4ff4859881f2ba571fd0dfc62309a5e9ab7ffcce
[gcc.git] / gcc / lambda-code.c
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
45
46 /* This loop nest code generation is based on non-singular matrix
47 math.
48
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
53
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
57 of basis vectors.
58
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
62
63 for i = 1 to 3, step 1 is a dense iteration space.
64
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
67
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
70
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
74
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
83
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
88
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
95
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
105
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
114
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
117
118
119
120 DEF_VEC_GC_P(int);
121
122 static bool perfect_nestify (struct loops *,
123 struct loop *, VEC (tree) *,
124 VEC (tree) *, VEC (int) *, VEC (tree) *);
125 /* Lattice stuff that is internal to the code generation algorithm. */
126
127 typedef struct
128 {
129 /* Lattice base matrix. */
130 lambda_matrix base;
131 /* Lattice dimension. */
132 int dimension;
133 /* Origin vector for the coefficients. */
134 lambda_vector origin;
135 /* Origin matrix for the invariants. */
136 lambda_matrix origin_invariants;
137 /* Number of invariants. */
138 int invariants;
139 } *lambda_lattice;
140
141 #define LATTICE_BASE(T) ((T)->base)
142 #define LATTICE_DIMENSION(T) ((T)->dimension)
143 #define LATTICE_ORIGIN(T) ((T)->origin)
144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
146
147 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
148 int, int);
149 static lambda_lattice lambda_lattice_new (int, int);
150 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
151
152 static tree find_induction_var_from_exit_cond (struct loop *);
153
154 /* Create a new lambda body vector. */
155
156 lambda_body_vector
157 lambda_body_vector_new (int size)
158 {
159 lambda_body_vector ret;
160
161 ret = ggc_alloc (sizeof (*ret));
162 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
163 LBV_SIZE (ret) = size;
164 LBV_DENOMINATOR (ret) = 1;
165 return ret;
166 }
167
168 /* Compute the new coefficients for the vector based on the
169 *inverse* of the transformation matrix. */
170
171 lambda_body_vector
172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
173 lambda_body_vector vect)
174 {
175 lambda_body_vector temp;
176 int depth;
177
178 /* Make sure the matrix is square. */
179 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
180
181 depth = LTM_ROWSIZE (transform);
182
183 temp = lambda_body_vector_new (depth);
184 LBV_DENOMINATOR (temp) =
185 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
187 LTM_MATRIX (transform), depth,
188 LBV_COEFFICIENTS (temp));
189 LBV_SIZE (temp) = LBV_SIZE (vect);
190 return temp;
191 }
192
193 /* Print out a lambda body vector. */
194
195 void
196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
197 {
198 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
199 }
200
201 /* Return TRUE if two linear expressions are equal. */
202
203 static bool
204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
205 int depth, int invariants)
206 {
207 int i;
208
209 if (lle1 == NULL || lle2 == NULL)
210 return false;
211 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
212 return false;
213 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
214 return false;
215 for (i = 0; i < depth; i++)
216 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
217 return false;
218 for (i = 0; i < invariants; i++)
219 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
220 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
221 return false;
222 return true;
223 }
224
225 /* Create a new linear expression with dimension DIM, and total number
226 of invariants INVARIANTS. */
227
228 lambda_linear_expression
229 lambda_linear_expression_new (int dim, int invariants)
230 {
231 lambda_linear_expression ret;
232
233 ret = ggc_alloc_cleared (sizeof (*ret));
234
235 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
236 LLE_CONSTANT (ret) = 0;
237 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
238 LLE_DENOMINATOR (ret) = 1;
239 LLE_NEXT (ret) = NULL;
240
241 return ret;
242 }
243
244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245 The starting letter used for variable names is START. */
246
247 static void
248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
249 char start)
250 {
251 int i;
252 bool first = true;
253 for (i = 0; i < size; i++)
254 {
255 if (expr[i] != 0)
256 {
257 if (first)
258 {
259 if (expr[i] < 0)
260 fprintf (outfile, "-");
261 first = false;
262 }
263 else if (expr[i] > 0)
264 fprintf (outfile, " + ");
265 else
266 fprintf (outfile, " - ");
267 if (abs (expr[i]) == 1)
268 fprintf (outfile, "%c", start + i);
269 else
270 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
271 }
272 }
273 }
274
275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276 depth/number of coefficients is given by DEPTH, the number of invariants is
277 given by INVARIANTS, and the character to start variable names with is given
278 by START. */
279
280 void
281 print_lambda_linear_expression (FILE * outfile,
282 lambda_linear_expression expr,
283 int depth, int invariants, char start)
284 {
285 fprintf (outfile, "\tLinear expression: ");
286 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
287 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
288 fprintf (outfile, " invariants: ");
289 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
290 invariants, 'A');
291 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
292 }
293
294 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
295 coefficients is given by DEPTH, the number of invariants is
296 given by INVARIANTS, and the character to start variable names with is given
297 by START. */
298
299 void
300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
301 int invariants, char start)
302 {
303 int step;
304 lambda_linear_expression expr;
305
306 gcc_assert (loop);
307
308 expr = LL_LINEAR_OFFSET (loop);
309 step = LL_STEP (loop);
310 fprintf (outfile, " step size = %d \n", step);
311
312 if (expr)
313 {
314 fprintf (outfile, " linear offset: \n");
315 print_lambda_linear_expression (outfile, expr, depth, invariants,
316 start);
317 }
318
319 fprintf (outfile, " lower bound: \n");
320 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
321 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322 fprintf (outfile, " upper bound: \n");
323 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
324 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
325 }
326
327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328 number of invariants. */
329
330 lambda_loopnest
331 lambda_loopnest_new (int depth, int invariants)
332 {
333 lambda_loopnest ret;
334 ret = ggc_alloc (sizeof (*ret));
335
336 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
337 LN_DEPTH (ret) = depth;
338 LN_INVARIANTS (ret) = invariants;
339
340 return ret;
341 }
342
343 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
344 character to use for loop names is given by START. */
345
346 void
347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
348 {
349 int i;
350 for (i = 0; i < LN_DEPTH (nest); i++)
351 {
352 fprintf (outfile, "Loop %c\n", start + i);
353 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
354 LN_INVARIANTS (nest), 'i');
355 fprintf (outfile, "\n");
356 }
357 }
358
359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
360 of invariants. */
361
362 static lambda_lattice
363 lambda_lattice_new (int depth, int invariants)
364 {
365 lambda_lattice ret;
366 ret = ggc_alloc (sizeof (*ret));
367 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
368 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
369 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
370 LATTICE_DIMENSION (ret) = depth;
371 LATTICE_INVARIANTS (ret) = invariants;
372 return ret;
373 }
374
375 /* Compute the lattice base for NEST. The lattice base is essentially a
376 non-singular transform from a dense base space to a sparse iteration space.
377 We use it so that we don't have to specially handle the case of a sparse
378 iteration space in other parts of the algorithm. As a result, this routine
379 only does something interesting (IE produce a matrix that isn't the
380 identity matrix) if NEST is a sparse space. */
381
382 static lambda_lattice
383 lambda_lattice_compute_base (lambda_loopnest nest)
384 {
385 lambda_lattice ret;
386 int depth, invariants;
387 lambda_matrix base;
388
389 int i, j, step;
390 lambda_loop loop;
391 lambda_linear_expression expression;
392
393 depth = LN_DEPTH (nest);
394 invariants = LN_INVARIANTS (nest);
395
396 ret = lambda_lattice_new (depth, invariants);
397 base = LATTICE_BASE (ret);
398 for (i = 0; i < depth; i++)
399 {
400 loop = LN_LOOPS (nest)[i];
401 gcc_assert (loop);
402 step = LL_STEP (loop);
403 /* If we have a step of 1, then the base is one, and the
404 origin and invariant coefficients are 0. */
405 if (step == 1)
406 {
407 for (j = 0; j < depth; j++)
408 base[i][j] = 0;
409 base[i][i] = 1;
410 LATTICE_ORIGIN (ret)[i] = 0;
411 for (j = 0; j < invariants; j++)
412 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
413 }
414 else
415 {
416 /* Otherwise, we need the lower bound expression (which must
417 be an affine function) to determine the base. */
418 expression = LL_LOWER_BOUND (loop);
419 gcc_assert (expression && LLE_NEXT (expression)
420 && LLE_DENOMINATOR (expression) == 1);
421
422 /* The lower triangular portion of the base is going to be the
423 coefficient times the step */
424 for (j = 0; j < i; j++)
425 base[i][j] = LLE_COEFFICIENTS (expression)[j]
426 * LL_STEP (LN_LOOPS (nest)[j]);
427 base[i][i] = step;
428 for (j = i + 1; j < depth; j++)
429 base[i][j] = 0;
430
431 /* Origin for this loop is the constant of the lower bound
432 expression. */
433 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
434
435 /* Coefficient for the invariants are equal to the invariant
436 coefficients in the expression. */
437 for (j = 0; j < invariants; j++)
438 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
439 LLE_INVARIANT_COEFFICIENTS (expression)[j];
440 }
441 }
442 return ret;
443 }
444
445 /* Compute the greatest common denominator of two numbers (A and B) using
446 Euclid's algorithm. */
447
448 static int
449 gcd (int a, int b)
450 {
451
452 int x, y, z;
453
454 x = abs (a);
455 y = abs (b);
456
457 while (x > 0)
458 {
459 z = y % x;
460 y = x;
461 x = z;
462 }
463
464 return (y);
465 }
466
467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
468
469 static int
470 gcd_vector (lambda_vector vector, int size)
471 {
472 int i;
473 int gcd1 = 0;
474
475 if (size > 0)
476 {
477 gcd1 = vector[0];
478 for (i = 1; i < size; i++)
479 gcd1 = gcd (gcd1, vector[i]);
480 }
481 return gcd1;
482 }
483
484 /* Compute the least common multiple of two numbers A and B . */
485
486 static int
487 lcm (int a, int b)
488 {
489 return (abs (a) * abs (b) / gcd (a, b));
490 }
491
492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
493 auxillary nest.
494 Fourier-Motzkin is a way of reducing systems of linear inequality so that
495 it is easy to calculate the answer and bounds.
496 A sketch of how it works:
497 Given a system of linear inequalities, ai * xj >= bk, you can always
498 rewrite the constraints so they are all of the form
499 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
500 in b1 ... bk, and some a in a1...ai)
501 You can then eliminate this x from the non-constant inequalities by
502 rewriting these as a <= b, x >= constant, and delete the x variable.
503 You can then repeat this for any remaining x variables, and then we have
504 an easy to use variable <= constant (or no variables at all) form that we
505 can construct our bounds from.
506
507 In our case, each time we eliminate, we construct part of the bound from
508 the ith variable, then delete the ith variable.
509
510 Remember the constant are in our vector a, our coefficient matrix is A,
511 and our invariant coefficient matrix is B.
512
513 SIZE is the size of the matrices being passed.
514 DEPTH is the loop nest depth.
515 INVARIANTS is the number of loop invariants.
516 A, B, and a are the coefficient matrix, invariant coefficient, and a
517 vector of constants, respectively. */
518
519 static lambda_loopnest
520 compute_nest_using_fourier_motzkin (int size,
521 int depth,
522 int invariants,
523 lambda_matrix A,
524 lambda_matrix B,
525 lambda_vector a)
526 {
527
528 int multiple, f1, f2;
529 int i, j, k;
530 lambda_linear_expression expression;
531 lambda_loop loop;
532 lambda_loopnest auxillary_nest;
533 lambda_matrix swapmatrix, A1, B1;
534 lambda_vector swapvector, a1;
535 int newsize;
536
537 A1 = lambda_matrix_new (128, depth);
538 B1 = lambda_matrix_new (128, invariants);
539 a1 = lambda_vector_new (128);
540
541 auxillary_nest = lambda_loopnest_new (depth, invariants);
542
543 for (i = depth - 1; i >= 0; i--)
544 {
545 loop = lambda_loop_new ();
546 LN_LOOPS (auxillary_nest)[i] = loop;
547 LL_STEP (loop) = 1;
548
549 for (j = 0; j < size; j++)
550 {
551 if (A[j][i] < 0)
552 {
553 /* Any linear expression in the matrix with a coefficient less
554 than 0 becomes part of the new lower bound. */
555 expression = lambda_linear_expression_new (depth, invariants);
556
557 for (k = 0; k < i; k++)
558 LLE_COEFFICIENTS (expression)[k] = A[j][k];
559
560 for (k = 0; k < invariants; k++)
561 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
562
563 LLE_DENOMINATOR (expression) = -1 * A[j][i];
564 LLE_CONSTANT (expression) = -1 * a[j];
565
566 /* Ignore if identical to the existing lower bound. */
567 if (!lle_equal (LL_LOWER_BOUND (loop),
568 expression, depth, invariants))
569 {
570 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
571 LL_LOWER_BOUND (loop) = expression;
572 }
573
574 }
575 else if (A[j][i] > 0)
576 {
577 /* Any linear expression with a coefficient greater than 0
578 becomes part of the new upper bound. */
579 expression = lambda_linear_expression_new (depth, invariants);
580 for (k = 0; k < i; k++)
581 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
582
583 for (k = 0; k < invariants; k++)
584 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
585
586 LLE_DENOMINATOR (expression) = A[j][i];
587 LLE_CONSTANT (expression) = a[j];
588
589 /* Ignore if identical to the existing upper bound. */
590 if (!lle_equal (LL_UPPER_BOUND (loop),
591 expression, depth, invariants))
592 {
593 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
594 LL_UPPER_BOUND (loop) = expression;
595 }
596
597 }
598 }
599
600 /* This portion creates a new system of linear inequalities by deleting
601 the i'th variable, reducing the system by one variable. */
602 newsize = 0;
603 for (j = 0; j < size; j++)
604 {
605 /* If the coefficient for the i'th variable is 0, then we can just
606 eliminate the variable straightaway. Otherwise, we have to
607 multiply through by the coefficients we are eliminating. */
608 if (A[j][i] == 0)
609 {
610 lambda_vector_copy (A[j], A1[newsize], depth);
611 lambda_vector_copy (B[j], B1[newsize], invariants);
612 a1[newsize] = a[j];
613 newsize++;
614 }
615 else if (A[j][i] > 0)
616 {
617 for (k = 0; k < size; k++)
618 {
619 if (A[k][i] < 0)
620 {
621 multiple = lcm (A[j][i], A[k][i]);
622 f1 = multiple / A[j][i];
623 f2 = -1 * multiple / A[k][i];
624
625 lambda_vector_add_mc (A[j], f1, A[k], f2,
626 A1[newsize], depth);
627 lambda_vector_add_mc (B[j], f1, B[k], f2,
628 B1[newsize], invariants);
629 a1[newsize] = f1 * a[j] + f2 * a[k];
630 newsize++;
631 }
632 }
633 }
634 }
635
636 swapmatrix = A;
637 A = A1;
638 A1 = swapmatrix;
639
640 swapmatrix = B;
641 B = B1;
642 B1 = swapmatrix;
643
644 swapvector = a;
645 a = a1;
646 a1 = swapvector;
647
648 size = newsize;
649 }
650
651 return auxillary_nest;
652 }
653
654 /* Compute the loop bounds for the auxiliary space NEST.
655 Input system used is Ax <= b. TRANS is the unimodular transformation. */
656
657 static lambda_loopnest
658 lambda_compute_auxillary_space (lambda_loopnest nest,
659 lambda_trans_matrix trans)
660 {
661 lambda_matrix A, B, A1, B1;
662 lambda_vector a, a1;
663 lambda_matrix invertedtrans;
664 int determinant, depth, invariants, size;
665 int i, j;
666 lambda_loop loop;
667 lambda_linear_expression expression;
668 lambda_lattice lattice;
669
670 depth = LN_DEPTH (nest);
671 invariants = LN_INVARIANTS (nest);
672
673 /* Unfortunately, we can't know the number of constraints we'll have
674 ahead of time, but this should be enough even in ridiculous loop nest
675 cases. We abort if we go over this limit. */
676 A = lambda_matrix_new (128, depth);
677 B = lambda_matrix_new (128, invariants);
678 a = lambda_vector_new (128);
679
680 A1 = lambda_matrix_new (128, depth);
681 B1 = lambda_matrix_new (128, invariants);
682 a1 = lambda_vector_new (128);
683
684 /* Store the bounds in the equation matrix A, constant vector a, and
685 invariant matrix B, so that we have Ax <= a + B.
686 This requires a little equation rearranging so that everything is on the
687 correct side of the inequality. */
688 size = 0;
689 for (i = 0; i < depth; i++)
690 {
691 loop = LN_LOOPS (nest)[i];
692
693 /* First we do the lower bound. */
694 if (LL_STEP (loop) > 0)
695 expression = LL_LOWER_BOUND (loop);
696 else
697 expression = LL_UPPER_BOUND (loop);
698
699 for (; expression != NULL; expression = LLE_NEXT (expression))
700 {
701 /* Fill in the coefficient. */
702 for (j = 0; j < i; j++)
703 A[size][j] = LLE_COEFFICIENTS (expression)[j];
704
705 /* And the invariant coefficient. */
706 for (j = 0; j < invariants; j++)
707 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
708
709 /* And the constant. */
710 a[size] = LLE_CONSTANT (expression);
711
712 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
713 constants and single variables on */
714 A[size][i] = -1 * LLE_DENOMINATOR (expression);
715 a[size] *= -1;
716 for (j = 0; j < invariants; j++)
717 B[size][j] *= -1;
718
719 size++;
720 /* Need to increase matrix sizes above. */
721 gcc_assert (size <= 127);
722
723 }
724
725 /* Then do the exact same thing for the upper bounds. */
726 if (LL_STEP (loop) > 0)
727 expression = LL_UPPER_BOUND (loop);
728 else
729 expression = LL_LOWER_BOUND (loop);
730
731 for (; expression != NULL; expression = LLE_NEXT (expression))
732 {
733 /* Fill in the coefficient. */
734 for (j = 0; j < i; j++)
735 A[size][j] = LLE_COEFFICIENTS (expression)[j];
736
737 /* And the invariant coefficient. */
738 for (j = 0; j < invariants; j++)
739 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
740
741 /* And the constant. */
742 a[size] = LLE_CONSTANT (expression);
743
744 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
745 for (j = 0; j < i; j++)
746 A[size][j] *= -1;
747 A[size][i] = LLE_DENOMINATOR (expression);
748 size++;
749 /* Need to increase matrix sizes above. */
750 gcc_assert (size <= 127);
751
752 }
753 }
754
755 /* Compute the lattice base x = base * y + origin, where y is the
756 base space. */
757 lattice = lambda_lattice_compute_base (nest);
758
759 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
760
761 /* A1 = A * L */
762 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
763
764 /* a1 = a - A * origin constant. */
765 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
766 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
767
768 /* B1 = B - A * origin invariant. */
769 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
770 invariants);
771 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
772
773 /* Now compute the auxiliary space bounds by first inverting U, multiplying
774 it by A1, then performing fourier motzkin. */
775
776 invertedtrans = lambda_matrix_new (depth, depth);
777
778 /* Compute the inverse of U. */
779 determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
780 invertedtrans, depth);
781
782 /* A = A1 inv(U). */
783 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
784
785 return compute_nest_using_fourier_motzkin (size, depth, invariants,
786 A, B1, a1);
787 }
788
789 /* Compute the loop bounds for the target space, using the bounds of
790 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
791 done by matrix multiplication and then transformation of the new matrix
792 back into linear expression form.
793 Return the target loopnest. */
794
795 static lambda_loopnest
796 lambda_compute_target_space (lambda_loopnest auxillary_nest,
797 lambda_trans_matrix H, lambda_vector stepsigns)
798 {
799 lambda_matrix inverse, H1;
800 int determinant, i, j;
801 int gcd1, gcd2;
802 int factor;
803
804 lambda_loopnest target_nest;
805 int depth, invariants;
806 lambda_matrix target;
807
808 lambda_loop auxillary_loop, target_loop;
809 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
810
811 depth = LN_DEPTH (auxillary_nest);
812 invariants = LN_INVARIANTS (auxillary_nest);
813
814 inverse = lambda_matrix_new (depth, depth);
815 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
816
817 /* H1 is H excluding its diagonal. */
818 H1 = lambda_matrix_new (depth, depth);
819 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
820
821 for (i = 0; i < depth; i++)
822 H1[i][i] = 0;
823
824 /* Computes the linear offsets of the loop bounds. */
825 target = lambda_matrix_new (depth, depth);
826 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
827
828 target_nest = lambda_loopnest_new (depth, invariants);
829
830 for (i = 0; i < depth; i++)
831 {
832
833 /* Get a new loop structure. */
834 target_loop = lambda_loop_new ();
835 LN_LOOPS (target_nest)[i] = target_loop;
836
837 /* Computes the gcd of the coefficients of the linear part. */
838 gcd1 = gcd_vector (target[i], i);
839
840 /* Include the denominator in the GCD. */
841 gcd1 = gcd (gcd1, determinant);
842
843 /* Now divide through by the gcd. */
844 for (j = 0; j < i; j++)
845 target[i][j] = target[i][j] / gcd1;
846
847 expression = lambda_linear_expression_new (depth, invariants);
848 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
849 LLE_DENOMINATOR (expression) = determinant / gcd1;
850 LLE_CONSTANT (expression) = 0;
851 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
852 invariants);
853 LL_LINEAR_OFFSET (target_loop) = expression;
854 }
855
856 /* For each loop, compute the new bounds from H. */
857 for (i = 0; i < depth; i++)
858 {
859 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
860 target_loop = LN_LOOPS (target_nest)[i];
861 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
862 factor = LTM_MATRIX (H)[i][i];
863
864 /* First we do the lower bound. */
865 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
866
867 for (; auxillary_expr != NULL;
868 auxillary_expr = LLE_NEXT (auxillary_expr))
869 {
870 target_expr = lambda_linear_expression_new (depth, invariants);
871 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
872 depth, inverse, depth,
873 LLE_COEFFICIENTS (target_expr));
874 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
875 LLE_COEFFICIENTS (target_expr), depth,
876 factor);
877
878 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
879 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
880 LLE_INVARIANT_COEFFICIENTS (target_expr),
881 invariants);
882 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
883 LLE_INVARIANT_COEFFICIENTS (target_expr),
884 invariants, factor);
885 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
886
887 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
888 {
889 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
890 * determinant;
891 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
892 (target_expr),
893 LLE_INVARIANT_COEFFICIENTS
894 (target_expr), invariants,
895 determinant);
896 LLE_DENOMINATOR (target_expr) =
897 LLE_DENOMINATOR (target_expr) * determinant;
898 }
899 /* Find the gcd and divide by it here, rather than doing it
900 at the tree level. */
901 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
902 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
903 invariants);
904 gcd1 = gcd (gcd1, gcd2);
905 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
906 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
907 for (j = 0; j < depth; j++)
908 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
909 for (j = 0; j < invariants; j++)
910 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
911 LLE_CONSTANT (target_expr) /= gcd1;
912 LLE_DENOMINATOR (target_expr) /= gcd1;
913 /* Ignore if identical to existing bound. */
914 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
915 invariants))
916 {
917 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
918 LL_LOWER_BOUND (target_loop) = target_expr;
919 }
920 }
921 /* Now do the upper bound. */
922 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
923
924 for (; auxillary_expr != NULL;
925 auxillary_expr = LLE_NEXT (auxillary_expr))
926 {
927 target_expr = lambda_linear_expression_new (depth, invariants);
928 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
929 depth, inverse, depth,
930 LLE_COEFFICIENTS (target_expr));
931 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
932 LLE_COEFFICIENTS (target_expr), depth,
933 factor);
934 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
935 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
936 LLE_INVARIANT_COEFFICIENTS (target_expr),
937 invariants);
938 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
939 LLE_INVARIANT_COEFFICIENTS (target_expr),
940 invariants, factor);
941 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
942
943 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
944 {
945 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
946 * determinant;
947 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
948 (target_expr),
949 LLE_INVARIANT_COEFFICIENTS
950 (target_expr), invariants,
951 determinant);
952 LLE_DENOMINATOR (target_expr) =
953 LLE_DENOMINATOR (target_expr) * determinant;
954 }
955 /* Find the gcd and divide by it here, instead of at the
956 tree level. */
957 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
958 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
959 invariants);
960 gcd1 = gcd (gcd1, gcd2);
961 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
962 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
963 for (j = 0; j < depth; j++)
964 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
965 for (j = 0; j < invariants; j++)
966 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
967 LLE_CONSTANT (target_expr) /= gcd1;
968 LLE_DENOMINATOR (target_expr) /= gcd1;
969 /* Ignore if equal to existing bound. */
970 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
971 invariants))
972 {
973 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
974 LL_UPPER_BOUND (target_loop) = target_expr;
975 }
976 }
977 }
978 for (i = 0; i < depth; i++)
979 {
980 target_loop = LN_LOOPS (target_nest)[i];
981 /* If necessary, exchange the upper and lower bounds and negate
982 the step size. */
983 if (stepsigns[i] < 0)
984 {
985 LL_STEP (target_loop) *= -1;
986 tmp_expr = LL_LOWER_BOUND (target_loop);
987 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
988 LL_UPPER_BOUND (target_loop) = tmp_expr;
989 }
990 }
991 return target_nest;
992 }
993
994 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
995 result. */
996
997 static lambda_vector
998 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
999 {
1000 lambda_matrix matrix, H;
1001 int size;
1002 lambda_vector newsteps;
1003 int i, j, factor, minimum_column;
1004 int temp;
1005
1006 matrix = LTM_MATRIX (trans);
1007 size = LTM_ROWSIZE (trans);
1008 H = lambda_matrix_new (size, size);
1009
1010 newsteps = lambda_vector_new (size);
1011 lambda_vector_copy (stepsigns, newsteps, size);
1012
1013 lambda_matrix_copy (matrix, H, size, size);
1014
1015 for (j = 0; j < size; j++)
1016 {
1017 lambda_vector row;
1018 row = H[j];
1019 for (i = j; i < size; i++)
1020 if (row[i] < 0)
1021 lambda_matrix_col_negate (H, size, i);
1022 while (lambda_vector_first_nz (row, size, j + 1) < size)
1023 {
1024 minimum_column = lambda_vector_min_nz (row, size, j);
1025 lambda_matrix_col_exchange (H, size, j, minimum_column);
1026
1027 temp = newsteps[j];
1028 newsteps[j] = newsteps[minimum_column];
1029 newsteps[minimum_column] = temp;
1030
1031 for (i = j + 1; i < size; i++)
1032 {
1033 factor = row[i] / row[j];
1034 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1035 }
1036 }
1037 }
1038 return newsteps;
1039 }
1040
1041 /* Transform NEST according to TRANS, and return the new loopnest.
1042 This involves
1043 1. Computing a lattice base for the transformation
1044 2. Composing the dense base with the specified transformation (TRANS)
1045 3. Decomposing the combined transformation into a lower triangular portion,
1046 and a unimodular portion.
1047 4. Computing the auxillary nest using the unimodular portion.
1048 5. Computing the target nest using the auxillary nest and the lower
1049 triangular portion. */
1050
1051 lambda_loopnest
1052 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1053 {
1054 lambda_loopnest auxillary_nest, target_nest;
1055
1056 int depth, invariants;
1057 int i, j;
1058 lambda_lattice lattice;
1059 lambda_trans_matrix trans1, H, U;
1060 lambda_loop loop;
1061 lambda_linear_expression expression;
1062 lambda_vector origin;
1063 lambda_matrix origin_invariants;
1064 lambda_vector stepsigns;
1065 int f;
1066
1067 depth = LN_DEPTH (nest);
1068 invariants = LN_INVARIANTS (nest);
1069
1070 /* Keep track of the signs of the loop steps. */
1071 stepsigns = lambda_vector_new (depth);
1072 for (i = 0; i < depth; i++)
1073 {
1074 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1075 stepsigns[i] = 1;
1076 else
1077 stepsigns[i] = -1;
1078 }
1079
1080 /* Compute the lattice base. */
1081 lattice = lambda_lattice_compute_base (nest);
1082 trans1 = lambda_trans_matrix_new (depth, depth);
1083
1084 /* Multiply the transformation matrix by the lattice base. */
1085
1086 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1087 LTM_MATRIX (trans1), depth, depth, depth);
1088
1089 /* Compute the Hermite normal form for the new transformation matrix. */
1090 H = lambda_trans_matrix_new (depth, depth);
1091 U = lambda_trans_matrix_new (depth, depth);
1092 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1093 LTM_MATRIX (U));
1094
1095 /* Compute the auxiliary loop nest's space from the unimodular
1096 portion. */
1097 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1098
1099 /* Compute the loop step signs from the old step signs and the
1100 transformation matrix. */
1101 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1102
1103 /* Compute the target loop nest space from the auxiliary nest and
1104 the lower triangular matrix H. */
1105 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1106 origin = lambda_vector_new (depth);
1107 origin_invariants = lambda_matrix_new (depth, invariants);
1108 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1109 LATTICE_ORIGIN (lattice), origin);
1110 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1111 origin_invariants, depth, depth, invariants);
1112
1113 for (i = 0; i < depth; i++)
1114 {
1115 loop = LN_LOOPS (target_nest)[i];
1116 expression = LL_LINEAR_OFFSET (loop);
1117 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1118 f = 1;
1119 else
1120 f = LLE_DENOMINATOR (expression);
1121
1122 LLE_CONSTANT (expression) += f * origin[i];
1123
1124 for (j = 0; j < invariants; j++)
1125 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1126 f * origin_invariants[i][j];
1127 }
1128
1129 return target_nest;
1130
1131 }
1132
1133 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1134 return the new expression. DEPTH is the depth of the loopnest.
1135 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1136 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1137 is the amount we have to add/subtract from the expression because of the
1138 type of comparison it is used in. */
1139
1140 static lambda_linear_expression
1141 gcc_tree_to_linear_expression (int depth, tree expr,
1142 VEC(tree) *outerinductionvars,
1143 VEC(tree) *invariants, int extra)
1144 {
1145 lambda_linear_expression lle = NULL;
1146 switch (TREE_CODE (expr))
1147 {
1148 case INTEGER_CST:
1149 {
1150 lle = lambda_linear_expression_new (depth, 2 * depth);
1151 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1152 if (extra != 0)
1153 LLE_CONSTANT (lle) = extra;
1154
1155 LLE_DENOMINATOR (lle) = 1;
1156 }
1157 break;
1158 case SSA_NAME:
1159 {
1160 tree iv, invar;
1161 size_t i;
1162 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1163 if (iv != NULL)
1164 {
1165 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1166 {
1167 lle = lambda_linear_expression_new (depth, 2 * depth);
1168 LLE_COEFFICIENTS (lle)[i] = 1;
1169 if (extra != 0)
1170 LLE_CONSTANT (lle) = extra;
1171
1172 LLE_DENOMINATOR (lle) = 1;
1173 }
1174 }
1175 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1176 if (invar != NULL)
1177 {
1178 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1179 {
1180 lle = lambda_linear_expression_new (depth, 2 * depth);
1181 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1182 if (extra != 0)
1183 LLE_CONSTANT (lle) = extra;
1184 LLE_DENOMINATOR (lle) = 1;
1185 }
1186 }
1187 }
1188 break;
1189 default:
1190 return NULL;
1191 }
1192
1193 return lle;
1194 }
1195
1196 /* Return true if OP is invariant in LOOP and all outer loops. */
1197
1198 static bool
1199 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1200 {
1201 if (is_gimple_min_invariant (op))
1202 return true;
1203 if (loop->depth == 0)
1204 return true;
1205 if (!expr_invariant_in_loop_p (loop, op))
1206 return false;
1207 if (loop->outer
1208 && !invariant_in_loop_and_outer_loops (loop->outer, op))
1209 return false;
1210 return true;
1211 }
1212
1213 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1214 or NULL if it could not be converted.
1215 DEPTH is the depth of the loop.
1216 INVARIANTS is a pointer to the array of loop invariants.
1217 The induction variable for this loop should be stored in the parameter
1218 OURINDUCTIONVAR.
1219 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1220
1221 static lambda_loop
1222 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1223 VEC (tree) ** invariants,
1224 tree * ourinductionvar,
1225 VEC (tree) * outerinductionvars,
1226 VEC (tree) ** lboundvars,
1227 VEC (tree) ** uboundvars,
1228 VEC (int) ** steps)
1229 {
1230 tree phi;
1231 tree exit_cond;
1232 tree access_fn, inductionvar;
1233 tree step;
1234 lambda_loop lloop = NULL;
1235 lambda_linear_expression lbound, ubound;
1236 tree test;
1237 int stepint;
1238 int extra = 0;
1239 tree lboundvar, uboundvar;
1240 use_optype uses;
1241
1242 /* Find out induction var and exit condition. */
1243 inductionvar = find_induction_var_from_exit_cond (loop);
1244 exit_cond = get_loop_exit_condition (loop);
1245
1246 if (inductionvar == NULL || exit_cond == NULL)
1247 {
1248 if (dump_file && (dump_flags & TDF_DETAILS))
1249 fprintf (dump_file,
1250 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1251 return NULL;
1252 }
1253
1254 test = TREE_OPERAND (exit_cond, 0);
1255
1256 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1257 {
1258
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1260 fprintf (dump_file,
1261 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1262
1263 return NULL;
1264 }
1265
1266 phi = SSA_NAME_DEF_STMT (inductionvar);
1267 if (TREE_CODE (phi) != PHI_NODE)
1268 {
1269 get_stmt_operands (phi);
1270 uses = STMT_USE_OPS (phi);
1271
1272 if (!uses)
1273 {
1274
1275 if (dump_file && (dump_flags & TDF_DETAILS))
1276 fprintf (dump_file,
1277 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1278
1279 return NULL;
1280 }
1281
1282 phi = USE_OP (uses, 0);
1283 phi = SSA_NAME_DEF_STMT (phi);
1284 if (TREE_CODE (phi) != PHI_NODE)
1285 {
1286
1287 if (dump_file && (dump_flags & TDF_DETAILS))
1288 fprintf (dump_file,
1289 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290 return NULL;
1291 }
1292
1293 }
1294 /* The induction variable name/version we want to put in the array is the
1295 result of the induction variable phi node. */
1296 *ourinductionvar = PHI_RESULT (phi);
1297 access_fn = instantiate_parameters
1298 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1299 if (!access_fn)
1300 {
1301 if (dump_file && (dump_flags & TDF_DETAILS))
1302 fprintf (dump_file,
1303 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1304
1305 return NULL;
1306 }
1307
1308 step = evolution_part_in_loop_num (access_fn, loop->num);
1309 if (!step || step == chrec_dont_know)
1310 {
1311 if (dump_file && (dump_flags & TDF_DETAILS))
1312 fprintf (dump_file,
1313 "Unable to convert loop: Cannot determine step of loop.\n");
1314
1315 return NULL;
1316 }
1317 if (TREE_CODE (step) != INTEGER_CST)
1318 {
1319
1320 if (dump_file && (dump_flags & TDF_DETAILS))
1321 fprintf (dump_file,
1322 "Unable to convert loop: Step of loop is not integer.\n");
1323 return NULL;
1324 }
1325
1326 stepint = TREE_INT_CST_LOW (step);
1327
1328 /* Only want phis for induction vars, which will have two
1329 arguments. */
1330 if (PHI_NUM_ARGS (phi) != 2)
1331 {
1332 if (dump_file && (dump_flags & TDF_DETAILS))
1333 fprintf (dump_file,
1334 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1335 return NULL;
1336 }
1337
1338 /* Another induction variable check. One argument's source should be
1339 in the loop, one outside the loop. */
1340 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1341 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1342 {
1343
1344 if (dump_file && (dump_flags & TDF_DETAILS))
1345 fprintf (dump_file,
1346 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1347
1348 return NULL;
1349 }
1350
1351 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1352 {
1353 lboundvar = PHI_ARG_DEF (phi, 1);
1354 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1355 outerinductionvars, *invariants,
1356 0);
1357 }
1358 else
1359 {
1360 lboundvar = PHI_ARG_DEF (phi, 0);
1361 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1362 outerinductionvars, *invariants,
1363 0);
1364 }
1365
1366 if (!lbound)
1367 {
1368
1369 if (dump_file && (dump_flags & TDF_DETAILS))
1370 fprintf (dump_file,
1371 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1372
1373 return NULL;
1374 }
1375 /* One part of the test may be a loop invariant tree. */
1376 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1377 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1378 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1379 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1380 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1381 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1382
1383 /* The non-induction variable part of the test is the upper bound variable.
1384 */
1385 if (TREE_OPERAND (test, 0) == inductionvar)
1386 uboundvar = TREE_OPERAND (test, 1);
1387 else
1388 uboundvar = TREE_OPERAND (test, 0);
1389
1390
1391 /* We only size the vectors assuming we have, at max, 2 times as many
1392 invariants as we do loops (one for each bound).
1393 This is just an arbitrary number, but it has to be matched against the
1394 code below. */
1395 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1396
1397
1398 /* We might have some leftover. */
1399 if (TREE_CODE (test) == LT_EXPR)
1400 extra = -1 * stepint;
1401 else if (TREE_CODE (test) == NE_EXPR)
1402 extra = -1 * stepint;
1403 else if (TREE_CODE (test) == GT_EXPR)
1404 extra = -1 * stepint;
1405
1406 ubound = gcc_tree_to_linear_expression (depth,
1407 uboundvar,
1408 outerinductionvars,
1409 *invariants, extra);
1410 VEC_safe_push (tree, *uboundvars, build (PLUS_EXPR, integer_type_node,
1411 uboundvar,
1412 build_int_cst (integer_type_node, extra)));
1413 VEC_safe_push (tree, *lboundvars, lboundvar);
1414 VEC_safe_push (int, *steps, stepint);
1415 if (!ubound)
1416 {
1417
1418 if (dump_file && (dump_flags & TDF_DETAILS))
1419 fprintf (dump_file,
1420 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1421 return NULL;
1422 }
1423
1424 lloop = lambda_loop_new ();
1425 LL_STEP (lloop) = stepint;
1426 LL_LOWER_BOUND (lloop) = lbound;
1427 LL_UPPER_BOUND (lloop) = ubound;
1428 return lloop;
1429 }
1430
1431 /* Given a LOOP, find the induction variable it is testing against in the exit
1432 condition. Return the induction variable if found, NULL otherwise. */
1433
1434 static tree
1435 find_induction_var_from_exit_cond (struct loop *loop)
1436 {
1437 tree expr = get_loop_exit_condition (loop);
1438 tree ivarop;
1439 tree test;
1440 if (expr == NULL_TREE)
1441 return NULL_TREE;
1442 if (TREE_CODE (expr) != COND_EXPR)
1443 return NULL_TREE;
1444 test = TREE_OPERAND (expr, 0);
1445 if (!COMPARISON_CLASS_P (test))
1446 return NULL_TREE;
1447 /* This is a guess. We say that for a <,!=,<= b, a is the induction
1448 variable.
1449 For >, >=, we guess b is the induction variable.
1450 If we are wrong, it'll fail the rest of the induction variable tests, and
1451 everything will be fine anyway. */
1452 switch (TREE_CODE (test))
1453 {
1454 case LT_EXPR:
1455 case LE_EXPR:
1456 case NE_EXPR:
1457 ivarop = TREE_OPERAND (test, 0);
1458 break;
1459 case GT_EXPR:
1460 case GE_EXPR:
1461 case EQ_EXPR:
1462 ivarop = TREE_OPERAND (test, 1);
1463 break;
1464 default:
1465 gcc_unreachable();
1466 }
1467 if (TREE_CODE (ivarop) != SSA_NAME)
1468 return NULL_TREE;
1469 return ivarop;
1470 }
1471
1472 DEF_VEC_GC_P(lambda_loop);
1473 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1474 Return the new loop nest.
1475 INDUCTIONVARS is a pointer to an array of induction variables for the
1476 loopnest that will be filled in during this process.
1477 INVARIANTS is a pointer to an array of invariants that will be filled in
1478 during this process. */
1479
1480 lambda_loopnest
1481 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1482 struct loop * loop_nest,
1483 VEC (tree) **inductionvars,
1484 VEC (tree) **invariants,
1485 bool need_perfect_nest)
1486 {
1487 lambda_loopnest ret;
1488 struct loop *temp;
1489 int depth = 0;
1490 size_t i;
1491 VEC (lambda_loop) *loops;
1492 VEC (tree) *uboundvars;
1493 VEC (tree) *lboundvars;
1494 VEC (int) *steps;
1495 lambda_loop newloop;
1496 tree inductionvar = NULL;
1497
1498 temp = loop_nest;
1499 while (temp)
1500 {
1501 depth++;
1502 temp = temp->inner;
1503 }
1504 loops = VEC_alloc (lambda_loop, 1);
1505 *inductionvars = VEC_alloc (tree, 1);
1506 *invariants = VEC_alloc (tree, 1);
1507 lboundvars = VEC_alloc (tree, 1);
1508 uboundvars = VEC_alloc (tree, 1);
1509 steps = VEC_alloc (int, 1);
1510 temp = loop_nest;
1511 while (temp)
1512 {
1513 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1514 &inductionvar, *inductionvars,
1515 &lboundvars, &uboundvars,
1516 &steps);
1517 if (!newloop)
1518 return NULL;
1519 VEC_safe_push (tree, *inductionvars, inductionvar);
1520 VEC_safe_push (lambda_loop, loops, newloop);
1521 temp = temp->inner;
1522 }
1523 if (need_perfect_nest
1524 && !perfect_nestify (currloops, loop_nest,
1525 lboundvars, uboundvars, steps, *inductionvars))
1526 {
1527 if (dump_file)
1528 fprintf (dump_file, "Not a perfect nest and couldn't convert to one.\n");
1529 return NULL;
1530 }
1531 ret = lambda_loopnest_new (depth, 2 * depth);
1532 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1533 LN_LOOPS (ret)[i] = newloop;
1534
1535 return ret;
1536
1537 }
1538
1539 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1540 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1541 inserted for us are stored. INDUCTION_VARS is the array of induction
1542 variables for the loop this LBV is from. */
1543
1544 static tree
1545 lbv_to_gcc_expression (lambda_body_vector lbv,
1546 VEC (tree) *induction_vars, tree * stmts_to_insert)
1547 {
1548 tree stmts, stmt, resvar, name;
1549 size_t i;
1550 tree_stmt_iterator tsi;
1551
1552 /* Create a statement list and a linear expression temporary. */
1553 stmts = alloc_stmt_list ();
1554 resvar = create_tmp_var (integer_type_node, "lbvtmp");
1555 add_referenced_tmp_var (resvar);
1556
1557 /* Start at 0. */
1558 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1559 name = make_ssa_name (resvar, stmt);
1560 TREE_OPERAND (stmt, 0) = name;
1561 tsi = tsi_last (stmts);
1562 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1563
1564 for (i = 0; i < VEC_length (tree ,induction_vars) ; i++)
1565 {
1566 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1567 {
1568 tree newname;
1569
1570 /* newname = coefficient * induction_variable */
1571 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1572 fold (build (MULT_EXPR, integer_type_node,
1573 VEC_index (tree, induction_vars, i),
1574 build_int_cst (integer_type_node,
1575 LBV_COEFFICIENTS (lbv)[i]))));
1576 newname = make_ssa_name (resvar, stmt);
1577 TREE_OPERAND (stmt, 0) = newname;
1578 tsi = tsi_last (stmts);
1579 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1580 /* name = name + newname */
1581 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1582 build (PLUS_EXPR, integer_type_node, name, newname));
1583 name = make_ssa_name (resvar, stmt);
1584 TREE_OPERAND (stmt, 0) = name;
1585 tsi = tsi_last (stmts);
1586 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1587 }
1588 }
1589
1590 /* Handle any denominator that occurs. */
1591 if (LBV_DENOMINATOR (lbv) != 1)
1592 {
1593 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1594 build (CEIL_DIV_EXPR, integer_type_node,
1595 name, build_int_cst (integer_type_node,
1596 LBV_DENOMINATOR (lbv))));
1597 name = make_ssa_name (resvar, stmt);
1598 TREE_OPERAND (stmt, 0) = name;
1599 tsi = tsi_last (stmts);
1600 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1601 }
1602 *stmts_to_insert = stmts;
1603 return name;
1604 }
1605
1606 /* Convert a linear expression from coefficient and constant form to a
1607 gcc tree.
1608 Return the tree that represents the final value of the expression.
1609 LLE is the linear expression to convert.
1610 OFFSET is the linear offset to apply to the expression.
1611 INDUCTION_VARS is a vector of induction variables for the loops.
1612 INVARIANTS is a vector of the loop nest invariants.
1613 WRAP specifies what tree code to wrap the results in, if there is more than
1614 one (it is either MAX_EXPR, or MIN_EXPR).
1615 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1616 statements that need to be inserted for the linear expression. */
1617
1618 static tree
1619 lle_to_gcc_expression (lambda_linear_expression lle,
1620 lambda_linear_expression offset,
1621 VEC(tree) *induction_vars,
1622 VEC(tree) *invariants,
1623 enum tree_code wrap, tree * stmts_to_insert)
1624 {
1625 tree stmts, stmt, resvar, name;
1626 size_t i;
1627 tree_stmt_iterator tsi;
1628 VEC(tree) *results;
1629
1630 name = NULL_TREE;
1631 /* Create a statement list and a linear expression temporary. */
1632 stmts = alloc_stmt_list ();
1633 resvar = create_tmp_var (integer_type_node, "lletmp");
1634 add_referenced_tmp_var (resvar);
1635 results = VEC_alloc (tree, 1);
1636
1637 /* Build up the linear expressions, and put the variable representing the
1638 result in the results array. */
1639 for (; lle != NULL; lle = LLE_NEXT (lle))
1640 {
1641 /* Start at name = 0. */
1642 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1643 name = make_ssa_name (resvar, stmt);
1644 TREE_OPERAND (stmt, 0) = name;
1645 tsi = tsi_last (stmts);
1646 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1647
1648 /* First do the induction variables.
1649 at the end, name = name + all the induction variables added
1650 together. */
1651 for (i = 0; i < VEC_length (tree ,induction_vars); i++)
1652 {
1653 if (LLE_COEFFICIENTS (lle)[i] != 0)
1654 {
1655 tree newname;
1656 tree mult;
1657 tree coeff;
1658
1659 /* mult = induction variable * coefficient. */
1660 if (LLE_COEFFICIENTS (lle)[i] == 1)
1661 {
1662 mult = VEC_index (tree, induction_vars, i);
1663 }
1664 else
1665 {
1666 coeff = build_int_cst (integer_type_node,
1667 LLE_COEFFICIENTS (lle)[i]);
1668 mult = fold (build (MULT_EXPR, integer_type_node,
1669 VEC_index (tree, induction_vars, i),
1670 coeff));
1671 }
1672
1673 /* newname = mult */
1674 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1675 newname = make_ssa_name (resvar, stmt);
1676 TREE_OPERAND (stmt, 0) = newname;
1677 tsi = tsi_last (stmts);
1678 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1679
1680 /* name = name + newname */
1681 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1682 build (PLUS_EXPR, integer_type_node,
1683 name, newname));
1684 name = make_ssa_name (resvar, stmt);
1685 TREE_OPERAND (stmt, 0) = name;
1686 tsi = tsi_last (stmts);
1687 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1688 }
1689 }
1690
1691 /* Handle our invariants.
1692 At the end, we have name = name + result of adding all multiplied
1693 invariants. */
1694 for (i = 0; i < VEC_length (tree, invariants); i++)
1695 {
1696 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1697 {
1698 tree newname;
1699 tree mult;
1700 tree coeff;
1701
1702 /* mult = invariant * coefficient */
1703 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] == 1)
1704 {
1705 mult = VEC_index (tree, invariants, i);
1706 }
1707 else
1708 {
1709 coeff = build_int_cst (integer_type_node,
1710 LLE_INVARIANT_COEFFICIENTS (lle)[i]);
1711 mult = fold (build (MULT_EXPR, integer_type_node,
1712 VEC_index (tree, invariants, i),
1713 coeff));
1714 }
1715
1716 /* newname = mult */
1717 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1718 newname = make_ssa_name (resvar, stmt);
1719 TREE_OPERAND (stmt, 0) = newname;
1720 tsi = tsi_last (stmts);
1721 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1722
1723 /* name = name + newname */
1724 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1725 build (PLUS_EXPR, integer_type_node,
1726 name, newname));
1727 name = make_ssa_name (resvar, stmt);
1728 TREE_OPERAND (stmt, 0) = name;
1729 tsi = tsi_last (stmts);
1730 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1731 }
1732 }
1733
1734 /* Now handle the constant.
1735 name = name + constant. */
1736 if (LLE_CONSTANT (lle) != 0)
1737 {
1738 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1739 build (PLUS_EXPR, integer_type_node,
1740 name, build_int_cst (integer_type_node,
1741 LLE_CONSTANT (lle))));
1742 name = make_ssa_name (resvar, stmt);
1743 TREE_OPERAND (stmt, 0) = name;
1744 tsi = tsi_last (stmts);
1745 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1746 }
1747
1748 /* Now handle the offset.
1749 name = name + linear offset. */
1750 if (LLE_CONSTANT (offset) != 0)
1751 {
1752 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1753 build (PLUS_EXPR, integer_type_node,
1754 name, build_int_cst (integer_type_node,
1755 LLE_CONSTANT (offset))));
1756 name = make_ssa_name (resvar, stmt);
1757 TREE_OPERAND (stmt, 0) = name;
1758 tsi = tsi_last (stmts);
1759 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1760 }
1761
1762 /* Handle any denominator that occurs. */
1763 if (LLE_DENOMINATOR (lle) != 1)
1764 {
1765 if (wrap == MAX_EXPR)
1766 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1767 build (CEIL_DIV_EXPR, integer_type_node,
1768 name, build_int_cst (integer_type_node,
1769 LLE_DENOMINATOR (lle))));
1770 else if (wrap == MIN_EXPR)
1771 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1772 build (FLOOR_DIV_EXPR, integer_type_node,
1773 name, build_int_cst (integer_type_node,
1774 LLE_DENOMINATOR (lle))));
1775 else
1776 gcc_unreachable();
1777
1778 /* name = {ceil, floor}(name/denominator) */
1779 name = make_ssa_name (resvar, stmt);
1780 TREE_OPERAND (stmt, 0) = name;
1781 tsi = tsi_last (stmts);
1782 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1783 }
1784 VEC_safe_push (tree, results, name);
1785 }
1786
1787 /* Again, out of laziness, we don't handle this case yet. It's not
1788 hard, it just hasn't occurred. */
1789 gcc_assert (VEC_length (tree, results) <= 2);
1790
1791 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1792 if (VEC_length (tree, results) > 1)
1793 {
1794 tree op1 = VEC_index (tree, results, 0);
1795 tree op2 = VEC_index (tree, results, 1);
1796 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1797 build (wrap, integer_type_node, op1, op2));
1798 name = make_ssa_name (resvar, stmt);
1799 TREE_OPERAND (stmt, 0) = name;
1800 tsi = tsi_last (stmts);
1801 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1802 }
1803
1804 *stmts_to_insert = stmts;
1805 return name;
1806 }
1807
1808 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1809 it, back into gcc code. This changes the
1810 loops, their induction variables, and their bodies, so that they
1811 match the transformed loopnest.
1812 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1813 loopnest.
1814 OLD_IVS is a vector of induction variables from the old loopnest.
1815 INVARIANTS is a vector of loop invariants from the old loopnest.
1816 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1817 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1818 NEW_LOOPNEST. */
1819 void
1820 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1821 VEC(tree) *old_ivs,
1822 VEC(tree) *invariants,
1823 lambda_loopnest new_loopnest,
1824 lambda_trans_matrix transform)
1825 {
1826
1827 struct loop *temp;
1828 size_t i = 0;
1829 size_t depth = 0;
1830 VEC(tree) *new_ivs;
1831 block_stmt_iterator bsi;
1832
1833 if (dump_file)
1834 {
1835 transform = lambda_trans_matrix_inverse (transform);
1836 fprintf (dump_file, "Inverse of transformation matrix:\n");
1837 print_lambda_trans_matrix (dump_file, transform);
1838 }
1839 temp = old_loopnest;
1840 new_ivs = VEC_alloc (tree, 1);
1841 while (temp)
1842 {
1843 temp = temp->inner;
1844 depth++;
1845 }
1846 temp = old_loopnest;
1847
1848 while (temp)
1849 {
1850 lambda_loop newloop;
1851 basic_block bb;
1852 tree ivvar, ivvarinced, exitcond, stmts;
1853 enum tree_code testtype;
1854 tree newupperbound, newlowerbound;
1855 lambda_linear_expression offset;
1856 /* First, build the new induction variable temporary */
1857
1858 ivvar = create_tmp_var (integer_type_node, "lnivtmp");
1859 add_referenced_tmp_var (ivvar);
1860
1861 VEC_safe_push (tree, new_ivs, ivvar);
1862
1863 newloop = LN_LOOPS (new_loopnest)[i];
1864
1865 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1866 cases for now. */
1867 offset = LL_LINEAR_OFFSET (newloop);
1868
1869 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1870 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1871
1872 /* Now build the new lower bounds, and insert the statements
1873 necessary to generate it on the loop preheader. */
1874 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1875 LL_LINEAR_OFFSET (newloop),
1876 new_ivs,
1877 invariants, MAX_EXPR, &stmts);
1878 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1879 bsi_commit_edge_inserts (NULL);
1880 /* Build the new upper bound and insert its statements in the
1881 basic block of the exit condition */
1882 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1883 LL_LINEAR_OFFSET (newloop),
1884 new_ivs,
1885 invariants, MIN_EXPR, &stmts);
1886 exitcond = get_loop_exit_condition (temp);
1887 bb = bb_for_stmt (exitcond);
1888 bsi = bsi_start (bb);
1889 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1890
1891 /* Create the new iv, and insert it's increment on the latch
1892 block. */
1893
1894 bb = EDGE_PRED (temp->latch, 0)->src;
1895 bsi = bsi_last (bb);
1896 create_iv (newlowerbound,
1897 build_int_cst (integer_type_node, LL_STEP (newloop)),
1898 ivvar, temp, &bsi, false, &ivvar,
1899 &ivvarinced);
1900
1901 /* Replace the exit condition with the new upper bound
1902 comparison. */
1903 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1904 COND_EXPR_COND (exitcond) = build (testtype,
1905 boolean_type_node,
1906 ivvarinced, newupperbound);
1907 modify_stmt (exitcond);
1908 VEC_replace (tree, new_ivs, i, ivvar);
1909
1910 i++;
1911 temp = temp->inner;
1912 }
1913
1914 /* Rewrite uses of the old ivs so that they are now specified in terms of
1915 the new ivs. */
1916 temp = old_loopnest;
1917 for (i = 0; i < VEC_length (tree, old_ivs); i++)
1918 {
1919 int j;
1920 tree oldiv = VEC_index (tree, old_ivs, i);
1921 dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
1922 for (j = 0; j < num_immediate_uses (imm); j++)
1923 {
1924 tree stmt = immediate_use (imm, j);
1925 use_operand_p use_p;
1926 ssa_op_iter iter;
1927 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1928 {
1929 if (USE_FROM_PTR (use_p) == oldiv)
1930 {
1931 tree newiv, stmts;
1932 lambda_body_vector lbv;
1933 /* Compute the new expression for the induction
1934 variable. */
1935 depth = VEC_length (tree, new_ivs);
1936 lbv = lambda_body_vector_new (depth);
1937 LBV_COEFFICIENTS (lbv)[i] = 1;
1938 lbv = lambda_body_vector_compute_new (transform, lbv);
1939 newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
1940 bsi = bsi_for_stmt (stmt);
1941 /* Insert the statements to build that
1942 expression. */
1943 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1944 propagate_value (use_p, newiv);
1945 modify_stmt (stmt);
1946
1947 }
1948 }
1949 }
1950 }
1951 }
1952
1953
1954 /* Returns true when the vector V is lexicographically positive, in
1955 other words, when the first nonzero element is positive. */
1956
1957 static bool
1958 lambda_vector_lexico_pos (lambda_vector v,
1959 unsigned n)
1960 {
1961 unsigned i;
1962 for (i = 0; i < n; i++)
1963 {
1964 if (v[i] == 0)
1965 continue;
1966 if (v[i] < 0)
1967 return false;
1968 if (v[i] > 0)
1969 return true;
1970 }
1971 return true;
1972 }
1973
1974
1975 /* Return TRUE if this is not interesting statement from the perspective of
1976 determining if we have a perfect loop nest. */
1977
1978 static bool
1979 not_interesting_stmt (tree stmt)
1980 {
1981 /* Note that COND_EXPR's aren't interesting because if they were exiting the
1982 loop, we would have already failed the number of exits tests. */
1983 if (TREE_CODE (stmt) == LABEL_EXPR
1984 || TREE_CODE (stmt) == GOTO_EXPR
1985 || TREE_CODE (stmt) == COND_EXPR)
1986 return true;
1987 return false;
1988 }
1989
1990 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
1991
1992 static bool
1993 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1994 {
1995 int i;
1996 for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1997 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1998 if (PHI_ARG_DEF (phi, i) == def)
1999 return true;
2000 return false;
2001 }
2002
2003 /* Return TRUE if STMT is a use of PHI_RESULT. */
2004
2005 static bool
2006 stmt_uses_phi_result (tree stmt, tree phi_result)
2007 {
2008 use_optype uses = STMT_USE_OPS (stmt);
2009
2010 /* This is conservatively true, because we only want SIMPLE bumpers
2011 of the form x +- constant for our pass. */
2012 if (NUM_USES (uses) != 1)
2013 return false;
2014 if (USE_OP (uses, 0) == phi_result)
2015 return true;
2016
2017 return false;
2018 }
2019
2020 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2021 in-loop-edge in a phi node, and the operand it uses is the result of that
2022 phi node.
2023 I.E. i_29 = i_3 + 1
2024 i_3 = PHI (0, i_29); */
2025
2026 static bool
2027 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2028 {
2029 tree use;
2030 tree def;
2031 def_optype defs = STMT_DEF_OPS (stmt);
2032 dataflow_t imm;
2033 int i;
2034
2035 if (NUM_DEFS (defs) != 1)
2036 return false;
2037 def = DEF_OP (defs, 0);
2038 imm = get_immediate_uses (stmt);
2039 for (i = 0; i < num_immediate_uses (imm); i++)
2040 {
2041 use = immediate_use (imm, i);
2042 if (TREE_CODE (use) == PHI_NODE)
2043 {
2044 if (phi_loop_edge_uses_def (loop, use, def))
2045 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2046 return true;
2047 }
2048 }
2049 return false;
2050 }
2051 /* Return true if LOOP is a perfect loop nest.
2052 Perfect loop nests are those loop nests where all code occurs in the
2053 innermost loop body.
2054 If S is a program statement, then
2055
2056 i.e.
2057 DO I = 1, 20
2058 S1
2059 DO J = 1, 20
2060 ...
2061 END DO
2062 END DO
2063 is not a perfect loop nest because of S1.
2064
2065 DO I = 1, 20
2066 DO J = 1, 20
2067 S1
2068 ...
2069 END DO
2070 END DO
2071 is a perfect loop nest.
2072
2073 Since we don't have high level loops anymore, we basically have to walk our
2074 statements and ignore those that are there because the loop needs them (IE
2075 the induction variable increment, and jump back to the top of the loop). */
2076
2077 bool
2078 perfect_nest_p (struct loop *loop)
2079 {
2080 basic_block *bbs;
2081 size_t i;
2082 tree exit_cond;
2083
2084 if (!loop->inner)
2085 return true;
2086 bbs = get_loop_body (loop);
2087 exit_cond = get_loop_exit_condition (loop);
2088 for (i = 0; i < loop->num_nodes; i++)
2089 {
2090 if (bbs[i]->loop_father == loop)
2091 {
2092 block_stmt_iterator bsi;
2093 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2094 {
2095 tree stmt = bsi_stmt (bsi);
2096 if (stmt == exit_cond
2097 || not_interesting_stmt (stmt)
2098 || stmt_is_bumper_for_loop (loop, stmt))
2099 continue;
2100 free (bbs);
2101 return false;
2102 }
2103 }
2104 }
2105 free (bbs);
2106 /* See if the inner loops are perfectly nested as well. */
2107 if (loop->inner)
2108 return perfect_nest_p (loop->inner);
2109 return true;
2110 }
2111
2112
2113 /* Add phi args using PENDINT_STMT list. */
2114
2115 static void
2116 nestify_update_pending_stmts (edge e)
2117 {
2118 basic_block dest;
2119 tree phi, arg, def;
2120
2121 if (!PENDING_STMT (e))
2122 return;
2123
2124 dest = e->dest;
2125
2126 for (phi = phi_nodes (dest), arg = PENDING_STMT (e);
2127 phi;
2128 phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg))
2129 {
2130 def = TREE_VALUE (arg);
2131 add_phi_arg (&phi, def, e);
2132 }
2133
2134 PENDING_STMT (e) = NULL;
2135 }
2136
2137 /* Replace the USES of tree X in STMT with tree Y */
2138
2139 static void
2140 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2141 {
2142 use_optype uses = STMT_USE_OPS (stmt);
2143 size_t i;
2144 for (i = 0; i < NUM_USES (uses); i++)
2145 {
2146 if (USE_OP (uses, i) == x)
2147 SET_USE_OP (uses, i, y);
2148 }
2149 }
2150
2151 /* Return TRUE if STMT uses tree OP in it's uses. */
2152
2153 static bool
2154 stmt_uses_op (tree stmt, tree op)
2155 {
2156 use_optype uses = STMT_USE_OPS (stmt);
2157 size_t i;
2158 for (i = 0; i < NUM_USES (uses); i++)
2159 {
2160 if (USE_OP (uses, i) == op)
2161 return true;
2162 }
2163 return false;
2164 }
2165
2166 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2167 one. LOOPIVS is a vector of induction variables, one per loop.
2168 ATM, we only handle imperfect nests of depth 2, where all of the statements
2169 occur after the inner loop. */
2170
2171 static bool
2172 can_convert_to_perfect_nest (struct loop *loop,
2173 VEC (tree) *loopivs)
2174 {
2175 basic_block *bbs;
2176 tree exit_condition;
2177 size_t i;
2178 block_stmt_iterator bsi;
2179
2180 /* Can't handle triply nested+ loops yet. */
2181 if (!loop->inner || loop->inner->inner)
2182 return false;
2183
2184 /* We only handle moving the after-inner-body statements right now, so make
2185 sure all the statements we need to move are located in that position. */
2186 bbs = get_loop_body (loop);
2187 exit_condition = get_loop_exit_condition (loop);
2188 for (i = 0; i < loop->num_nodes; i++)
2189 {
2190 if (bbs[i]->loop_father == loop)
2191 {
2192 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2193 {
2194 size_t j;
2195 tree stmt = bsi_stmt (bsi);
2196 if (stmt == exit_condition
2197 || not_interesting_stmt (stmt)
2198 || stmt_is_bumper_for_loop (loop, stmt))
2199 continue;
2200 /* If the statement uses inner loop ivs, we == screwed. */
2201 for (j = 1; j < VEC_length (tree, loopivs); j++)
2202 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2203 {
2204 free (bbs);
2205 return false;
2206 }
2207
2208 /* If the bb of a statement we care about isn't dominated by
2209 the header of the inner loop, then we are also screwed. */
2210 if (!dominated_by_p (CDI_DOMINATORS,
2211 bb_for_stmt (stmt),
2212 loop->inner->header))
2213 {
2214 free (bbs);
2215 return false;
2216 }
2217 }
2218 }
2219 }
2220 return true;
2221 }
2222
2223 /* Transform the loop nest into a perfect nest, if possible.
2224 LOOPS is the current struct loops *
2225 LOOP is the loop nest to transform into a perfect nest
2226 LBOUNDS are the lower bounds for the loops to transform
2227 UBOUNDS are the upper bounds for the loops to transform
2228 STEPS is the STEPS for the loops to transform.
2229 LOOPIVS is the induction variables for the loops to transform.
2230
2231 Basically, for the case of
2232
2233 FOR (i = 0; i < 50; i++)
2234 {
2235 FOR (j =0; j < 50; j++)
2236 {
2237 <whatever>
2238 }
2239 <some code>
2240 }
2241
2242 This function will transform it into a perfect loop nest by splitting the
2243 outer loop into two loops, like so:
2244
2245 FOR (i = 0; i < 50; i++)
2246 {
2247 FOR (j = 0; j < 50; j++)
2248 {
2249 <whatever>
2250 }
2251 }
2252
2253 FOR (i = 0; i < 50; i ++)
2254 {
2255 <some code>
2256 }
2257
2258 Return FALSE if we can't make this loop into a perfect nest. */
2259 static bool
2260 perfect_nestify (struct loops *loops,
2261 struct loop *loop,
2262 VEC (tree) *lbounds,
2263 VEC (tree) *ubounds,
2264 VEC (int) *steps,
2265 VEC (tree) *loopivs)
2266 {
2267 basic_block *bbs;
2268 tree exit_condition;
2269 tree then_label, else_label, cond_stmt;
2270 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2271 size_t i;
2272 block_stmt_iterator bsi;
2273 edge e;
2274 struct loop *newloop;
2275 tree phi;
2276 tree uboundvar;
2277 tree stmt;
2278 tree ivvar, ivvarinced;
2279 VEC (tree) *phis;
2280
2281 if (!can_convert_to_perfect_nest (loop, loopivs))
2282 return false;
2283
2284 phis = VEC_alloc (tree, 1);
2285
2286 /* Create the new loop */
2287
2288 olddest = loop->single_exit->dest;
2289 preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2290 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2291
2292 /* This is done because otherwise, it will release the ssa_name too early
2293 when the edge gets redirected and it will get reused, causing the use of
2294 the phi node to get rewritten. */
2295
2296 for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2297 {
2298 /* These should be simple exit phi copies. */
2299 if (PHI_NUM_ARGS (phi) != 1)
2300 return false;
2301 VEC_safe_push (tree, phis, PHI_RESULT (phi));
2302 VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
2303 mark_for_rewrite (PHI_RESULT (phi));
2304 }
2305 e = redirect_edge_and_branch (EDGE_SUCC (preheaderbb, 0), headerbb);
2306 unmark_all_for_rewrite ();
2307 bb_ann (olddest)->phi_nodes = NULL;
2308 /* Add back the old exit phis. */
2309 while (VEC_length (tree, phis) != 0)
2310 {
2311 tree def;
2312 tree phiname;
2313 def = VEC_pop (tree, phis);
2314 phiname = VEC_pop (tree, phis);
2315
2316 phi = create_phi_node (phiname, preheaderbb);
2317 add_phi_arg (&phi, def, EDGE_PRED (preheaderbb, 0));
2318 }
2319
2320 nestify_update_pending_stmts (e);
2321 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2322 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2323 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2324 then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2325 else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2326 cond_stmt = build (COND_EXPR, void_type_node,
2327 build (NE_EXPR, boolean_type_node,
2328 integer_one_node,
2329 integer_zero_node),
2330 then_label, else_label);
2331 bsi = bsi_start (bodybb);
2332 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2333 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2334 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2335 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2336
2337 /* Update the loop structures. */
2338 newloop = duplicate_loop (loops, loop, olddest->loop_father);
2339 newloop->header = headerbb;
2340 newloop->latch = latchbb;
2341 newloop->single_exit = e;
2342 add_bb_to_loop (latchbb, newloop);
2343 add_bb_to_loop (bodybb, newloop);
2344 add_bb_to_loop (headerbb, newloop);
2345 add_bb_to_loop (preheaderbb, olddest->loop_father);
2346 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2347 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2348 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2349 loop->single_exit->src);
2350 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2351 set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2352 /* Create the new iv. */
2353 ivvar = create_tmp_var (integer_type_node, "perfectiv");
2354 add_referenced_tmp_var (ivvar);
2355 bsi = bsi_last (EDGE_PRED (newloop->latch, 0)->src);
2356 create_iv (VEC_index (tree, lbounds, 0),
2357 build_int_cst (integer_type_node,
2358 VEC_index (int, steps, 0)),
2359 ivvar, newloop, &bsi, false, &ivvar, &ivvarinced);
2360
2361 /* Create the new upper bound. This may be not just a variable, so we copy
2362 it to one just in case. */
2363
2364 exit_condition = get_loop_exit_condition (newloop);
2365 uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2366 add_referenced_tmp_var (uboundvar);
2367 stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2368 VEC_index (tree, ubounds, 0));
2369 uboundvar = make_ssa_name (uboundvar, stmt);
2370 TREE_OPERAND (stmt, 0) = uboundvar;
2371 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2372 COND_EXPR_COND (exit_condition) = build (LE_EXPR,
2373 boolean_type_node,
2374 ivvarinced,
2375 uboundvar);
2376
2377 bbs = get_loop_body (loop);
2378 /* Now replace the induction variable in the moved statements with the
2379 correct loop induction variable. */
2380 for (i = 0; i < loop->num_nodes; i++)
2381 {
2382 block_stmt_iterator tobsi = bsi_last (bodybb);
2383 if (bbs[i]->loop_father == loop)
2384 {
2385 /* Note that the bsi only needs to be explicitly incremented
2386 when we don't move something, since it is automatically
2387 incremented when we do. */
2388 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2389 {
2390 tree stmt = bsi_stmt (bsi);
2391 if (stmt == exit_condition
2392 || not_interesting_stmt (stmt)
2393 || stmt_is_bumper_for_loop (loop, stmt))
2394 {
2395 bsi_next (&bsi);
2396 continue;
2397 }
2398 replace_uses_of_x_with_y (stmt,
2399 VEC_index (tree, loopivs, 0),
2400 ivvar);
2401 bsi_move_before (&bsi, &tobsi);
2402 }
2403 }
2404 }
2405 free (bbs);
2406 flow_loops_find (loops, LOOP_ALL);
2407 return perfect_nest_p (loop);
2408 }
2409
2410 /* Return true if TRANS is a legal transformation matrix that respects
2411 the dependence vectors in DISTS and DIRS. The conservative answer
2412 is false.
2413
2414 "Wolfe proves that a unimodular transformation represented by the
2415 matrix T is legal when applied to a loop nest with a set of
2416 lexicographically non-negative distance vectors RDG if and only if
2417 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2418 i.e.: if and only if it transforms the lexicographically positive
2419 distance vectors to lexicographically positive vectors. Note that
2420 a unimodular matrix must transform the zero vector (and only it) to
2421 the zero vector." S.Muchnick. */
2422
2423 bool
2424 lambda_transform_legal_p (lambda_trans_matrix trans,
2425 int nb_loops,
2426 varray_type dependence_relations)
2427 {
2428 unsigned int i;
2429 lambda_vector distres;
2430 struct data_dependence_relation *ddr;
2431
2432 #if defined ENABLE_CHECKING
2433 if (LTM_COLSIZE (trans) != nb_loops
2434 || LTM_ROWSIZE (trans) != nb_loops)
2435 abort ();
2436 #endif
2437
2438 /* When there is an unknown relation in the dependence_relations, we
2439 know that it is no worth looking at this loop nest: give up. */
2440 ddr = (struct data_dependence_relation *)
2441 VARRAY_GENERIC_PTR (dependence_relations, 0);
2442 if (ddr == NULL)
2443 return true;
2444 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2445 return false;
2446
2447 distres = lambda_vector_new (nb_loops);
2448
2449 /* For each distance vector in the dependence graph. */
2450 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2451 {
2452 ddr = (struct data_dependence_relation *)
2453 VARRAY_GENERIC_PTR (dependence_relations, i);
2454
2455
2456
2457 /* Don't care about relations for which we know that there is no
2458 dependence, nor about read-read (aka. output-dependences):
2459 these data accesses can happen in any order. */
2460 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2461 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2462 continue;
2463 /* Conservatively answer: "this transformation is not valid". */
2464 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2465 return false;
2466
2467 /* Compute trans.dist_vect */
2468 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2469 DDR_DIST_VECT (ddr), distres);
2470
2471 if (!lambda_vector_lexico_pos (distres, nb_loops))
2472 return false;
2473 }
2474 return true;
2475 }