re PR preprocessor/36674 (#include location is offset by one row in errors from prepr...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "tree.h"
83
84 /* These RTL headers are needed for basic-block.h. */
85 #include "rtl.h"
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
90 #include "timevar.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
96
97 static struct datadep_stats
98 {
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
103
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
107
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
112
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
117
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
123
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
129
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
132 {
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
136 }
137
138 /* Returns true iff A divides B. */
139
140 static inline bool
141 int_divides_p (int a, int b)
142 {
143 return ((b % a) == 0);
144 }
145
146 \f
147
148 /* Dump into FILE all the data references from DATAREFS. */
149
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
152 {
153 unsigned int i;
154 struct data_reference *dr;
155
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
158 }
159
160 /* Dump to STDERR all the dependence relations from DDRS. */
161
162 void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 {
165 dump_data_dependence_relations (stderr, ddrs);
166 }
167
168 /* Dump into FILE all the dependence relations from DDRS. */
169
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
173 {
174 unsigned int i;
175 struct data_dependence_relation *ddr;
176
177 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
178 dump_data_dependence_relation (file, ddr);
179 }
180
181 /* Dump function for a DATA_REFERENCE structure. */
182
183 void
184 dump_data_reference (FILE *outf,
185 struct data_reference *dr)
186 {
187 unsigned int i;
188
189 fprintf (outf, "(Data Ref: \n stmt: ");
190 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
191 fprintf (outf, " ref: ");
192 print_generic_stmt (outf, DR_REF (dr), 0);
193 fprintf (outf, " base_object: ");
194 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
195
196 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
197 {
198 fprintf (outf, " Access function %d: ", i);
199 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
200 }
201 fprintf (outf, ")\n");
202 }
203
204 /* Dumps the affine function described by FN to the file OUTF. */
205
206 static void
207 dump_affine_function (FILE *outf, affine_fn fn)
208 {
209 unsigned i;
210 tree coef;
211
212 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
213 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
214 {
215 fprintf (outf, " + ");
216 print_generic_expr (outf, coef, TDF_SLIM);
217 fprintf (outf, " * x_%u", i);
218 }
219 }
220
221 /* Dumps the conflict function CF to the file OUTF. */
222
223 static void
224 dump_conflict_function (FILE *outf, conflict_function *cf)
225 {
226 unsigned i;
227
228 if (cf->n == NO_DEPENDENCE)
229 fprintf (outf, "no dependence\n");
230 else if (cf->n == NOT_KNOWN)
231 fprintf (outf, "not known\n");
232 else
233 {
234 for (i = 0; i < cf->n; i++)
235 {
236 fprintf (outf, "[");
237 dump_affine_function (outf, cf->fns[i]);
238 fprintf (outf, "]\n");
239 }
240 }
241 }
242
243 /* Dump function for a SUBSCRIPT structure. */
244
245 void
246 dump_subscript (FILE *outf, struct subscript *subscript)
247 {
248 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
249
250 fprintf (outf, "\n (subscript \n");
251 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
254 {
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
258 }
259
260 cf = SUB_CONFLICTS_IN_B (subscript);
261 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
264 {
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
268 }
269
270 fprintf (outf, " (Subscript distance: ");
271 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
272 fprintf (outf, " )\n");
273 fprintf (outf, " )\n");
274 }
275
276 /* Print the classic direction vector DIRV to OUTF. */
277
278 void
279 print_direction_vector (FILE *outf,
280 lambda_vector dirv,
281 int length)
282 {
283 int eq;
284
285 for (eq = 0; eq < length; eq++)
286 {
287 enum data_dependence_direction dir = ((enum data_dependence_direction)
288 dirv[eq]);
289
290 switch (dir)
291 {
292 case dir_positive:
293 fprintf (outf, " +");
294 break;
295 case dir_negative:
296 fprintf (outf, " -");
297 break;
298 case dir_equal:
299 fprintf (outf, " =");
300 break;
301 case dir_positive_or_equal:
302 fprintf (outf, " +=");
303 break;
304 case dir_positive_or_negative:
305 fprintf (outf, " +-");
306 break;
307 case dir_negative_or_equal:
308 fprintf (outf, " -=");
309 break;
310 case dir_star:
311 fprintf (outf, " *");
312 break;
313 default:
314 fprintf (outf, "indep");
315 break;
316 }
317 }
318 fprintf (outf, "\n");
319 }
320
321 /* Print a vector of direction vectors. */
322
323 void
324 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
325 int length)
326 {
327 unsigned j;
328 lambda_vector v;
329
330 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
331 print_direction_vector (outf, v, length);
332 }
333
334 /* Print a vector of distance vectors. */
335
336 void
337 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
338 int length)
339 {
340 unsigned j;
341 lambda_vector v;
342
343 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
344 print_lambda_vector (outf, v, length);
345 }
346
347 /* Debug version. */
348
349 void
350 debug_data_dependence_relation (struct data_dependence_relation *ddr)
351 {
352 dump_data_dependence_relation (stderr, ddr);
353 }
354
355 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
356
357 void
358 dump_data_dependence_relation (FILE *outf,
359 struct data_dependence_relation *ddr)
360 {
361 struct data_reference *dra, *drb;
362
363 fprintf (outf, "(Data Dep: \n");
364
365 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
366 {
367 fprintf (outf, " (don't know)\n)\n");
368 return;
369 }
370
371 dra = DDR_A (ddr);
372 drb = DDR_B (ddr);
373 dump_data_reference (outf, dra);
374 dump_data_reference (outf, drb);
375
376 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
377 fprintf (outf, " (no dependence)\n");
378
379 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
380 {
381 unsigned int i;
382 struct loop *loopi;
383
384 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
385 {
386 fprintf (outf, " access_fn_A: ");
387 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
388 fprintf (outf, " access_fn_B: ");
389 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
390 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
391 }
392
393 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
394 fprintf (outf, " loop nest: (");
395 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
396 fprintf (outf, "%d ", loopi->num);
397 fprintf (outf, ")\n");
398
399 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
400 {
401 fprintf (outf, " distance_vector: ");
402 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
403 DDR_NB_LOOPS (ddr));
404 }
405
406 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
407 {
408 fprintf (outf, " direction_vector: ");
409 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
411 }
412 }
413
414 fprintf (outf, ")\n");
415 }
416
417 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
418
419 void
420 dump_data_dependence_direction (FILE *file,
421 enum data_dependence_direction dir)
422 {
423 switch (dir)
424 {
425 case dir_positive:
426 fprintf (file, "+");
427 break;
428
429 case dir_negative:
430 fprintf (file, "-");
431 break;
432
433 case dir_equal:
434 fprintf (file, "=");
435 break;
436
437 case dir_positive_or_negative:
438 fprintf (file, "+-");
439 break;
440
441 case dir_positive_or_equal:
442 fprintf (file, "+=");
443 break;
444
445 case dir_negative_or_equal:
446 fprintf (file, "-=");
447 break;
448
449 case dir_star:
450 fprintf (file, "*");
451 break;
452
453 default:
454 break;
455 }
456 }
457
458 /* Dumps the distance and direction vectors in FILE. DDRS contains
459 the dependence relations, and VECT_SIZE is the size of the
460 dependence vectors, or in other words the number of loops in the
461 considered nest. */
462
463 void
464 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
465 {
466 unsigned int i, j;
467 struct data_dependence_relation *ddr;
468 lambda_vector v;
469
470 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
471 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
472 {
473 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
474 {
475 fprintf (file, "DISTANCE_V (");
476 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
477 fprintf (file, ")\n");
478 }
479
480 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
481 {
482 fprintf (file, "DIRECTION_V (");
483 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
484 fprintf (file, ")\n");
485 }
486 }
487
488 fprintf (file, "\n\n");
489 }
490
491 /* Dumps the data dependence relations DDRS in FILE. */
492
493 void
494 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
495 {
496 unsigned int i;
497 struct data_dependence_relation *ddr;
498
499 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
500 dump_data_dependence_relation (file, ddr);
501
502 fprintf (file, "\n\n");
503 }
504
505 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
506 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
507 constant of type ssizetype, and returns true. If we cannot do this
508 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
509 is returned. */
510
511 static bool
512 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
513 tree *var, tree *off)
514 {
515 tree var0, var1;
516 tree off0, off1;
517 enum tree_code ocode = code;
518
519 *var = NULL_TREE;
520 *off = NULL_TREE;
521
522 switch (code)
523 {
524 case INTEGER_CST:
525 *var = build_int_cst (type, 0);
526 *off = fold_convert (ssizetype, op0);
527 return true;
528
529 case POINTER_PLUS_EXPR:
530 ocode = PLUS_EXPR;
531 /* FALLTHROUGH */
532 case PLUS_EXPR:
533 case MINUS_EXPR:
534 split_constant_offset (op0, &var0, &off0);
535 split_constant_offset (op1, &var1, &off1);
536 *var = fold_build2 (code, type, var0, var1);
537 *off = size_binop (ocode, off0, off1);
538 return true;
539
540 case MULT_EXPR:
541 if (TREE_CODE (op1) != INTEGER_CST)
542 return false;
543
544 split_constant_offset (op0, &var0, &off0);
545 *var = fold_build2 (MULT_EXPR, type, var0, op1);
546 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
547 return true;
548
549 case ADDR_EXPR:
550 {
551 tree base, poffset;
552 HOST_WIDE_INT pbitsize, pbitpos;
553 enum machine_mode pmode;
554 int punsignedp, pvolatilep;
555
556 op0 = TREE_OPERAND (op0, 0);
557 if (!handled_component_p (op0))
558 return false;
559
560 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
561 &pmode, &punsignedp, &pvolatilep, false);
562
563 if (pbitpos % BITS_PER_UNIT != 0)
564 return false;
565 base = build_fold_addr_expr (base);
566 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
567
568 if (poffset)
569 {
570 split_constant_offset (poffset, &poffset, &off1);
571 off0 = size_binop (PLUS_EXPR, off0, off1);
572 if (POINTER_TYPE_P (TREE_TYPE (base)))
573 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
574 base, fold_convert (sizetype, poffset));
575 else
576 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
577 fold_convert (TREE_TYPE (base), poffset));
578 }
579
580 var0 = fold_convert (type, base);
581
582 /* If variable length types are involved, punt, otherwise casts
583 might be converted into ARRAY_REFs in gimplify_conversion.
584 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
585 possibly no longer appears in current GIMPLE, might resurface.
586 This perhaps could run
587 if (CONVERT_EXPR_P (var0))
588 {
589 gimplify_conversion (&var0);
590 // Attempt to fill in any within var0 found ARRAY_REF's
591 // element size from corresponding op embedded ARRAY_REF,
592 // if unsuccessful, just punt.
593 } */
594 while (POINTER_TYPE_P (type))
595 type = TREE_TYPE (type);
596 if (int_size_in_bytes (type) < 0)
597 return false;
598
599 *var = var0;
600 *off = off0;
601 return true;
602 }
603
604 case SSA_NAME:
605 {
606 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
607 enum tree_code subcode;
608
609 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
610 return false;
611
612 var0 = gimple_assign_rhs1 (def_stmt);
613 subcode = gimple_assign_rhs_code (def_stmt);
614 var1 = gimple_assign_rhs2 (def_stmt);
615
616 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
617 }
618
619 default:
620 return false;
621 }
622 }
623
624 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
625 will be ssizetype. */
626
627 void
628 split_constant_offset (tree exp, tree *var, tree *off)
629 {
630 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
631 enum tree_code code;
632
633 *var = exp;
634 *off = ssize_int (0);
635 STRIP_NOPS (exp);
636
637 if (automatically_generated_chrec_p (exp))
638 return;
639
640 otype = TREE_TYPE (exp);
641 code = TREE_CODE (exp);
642 extract_ops_from_tree (exp, &code, &op0, &op1);
643 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
644 {
645 *var = fold_convert (type, e);
646 *off = o;
647 }
648 }
649
650 /* Returns the address ADDR of an object in a canonical shape (without nop
651 casts, and with type of pointer to the object). */
652
653 static tree
654 canonicalize_base_object_address (tree addr)
655 {
656 tree orig = addr;
657
658 STRIP_NOPS (addr);
659
660 /* The base address may be obtained by casting from integer, in that case
661 keep the cast. */
662 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
663 return orig;
664
665 if (TREE_CODE (addr) != ADDR_EXPR)
666 return addr;
667
668 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
669 }
670
671 /* Analyzes the behavior of the memory reference DR in the innermost loop that
672 contains it. Returns true if analysis succeed or false otherwise. */
673
674 bool
675 dr_analyze_innermost (struct data_reference *dr)
676 {
677 gimple stmt = DR_STMT (dr);
678 struct loop *loop = loop_containing_stmt (stmt);
679 tree ref = DR_REF (dr);
680 HOST_WIDE_INT pbitsize, pbitpos;
681 tree base, poffset;
682 enum machine_mode pmode;
683 int punsignedp, pvolatilep;
684 affine_iv base_iv, offset_iv;
685 tree init, dinit, step;
686
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (dump_file, "analyze_innermost: ");
689
690 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
691 &pmode, &punsignedp, &pvolatilep, false);
692 gcc_assert (base != NULL_TREE);
693
694 if (pbitpos % BITS_PER_UNIT != 0)
695 {
696 if (dump_file && (dump_flags & TDF_DETAILS))
697 fprintf (dump_file, "failed: bit offset alignment.\n");
698 return false;
699 }
700
701 base = build_fold_addr_expr (base);
702 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, false))
703 {
704 if (dump_file && (dump_flags & TDF_DETAILS))
705 fprintf (dump_file, "failed: evolution of base is not affine.\n");
706 return false;
707 }
708 if (!poffset)
709 {
710 offset_iv.base = ssize_int (0);
711 offset_iv.step = ssize_int (0);
712 }
713 else if (!simple_iv (loop, loop_containing_stmt (stmt),
714 poffset, &offset_iv, false))
715 {
716 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
718 return false;
719 }
720
721 init = ssize_int (pbitpos / BITS_PER_UNIT);
722 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
723 init = size_binop (PLUS_EXPR, init, dinit);
724 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
725 init = size_binop (PLUS_EXPR, init, dinit);
726
727 step = size_binop (PLUS_EXPR,
728 fold_convert (ssizetype, base_iv.step),
729 fold_convert (ssizetype, offset_iv.step));
730
731 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
732
733 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
734 DR_INIT (dr) = init;
735 DR_STEP (dr) = step;
736
737 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
738
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "success.\n");
741
742 return true;
743 }
744
745 /* Determines the base object and the list of indices of memory reference
746 DR, analyzed in loop nest NEST. */
747
748 static void
749 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
750 {
751 gimple stmt = DR_STMT (dr);
752 struct loop *loop = loop_containing_stmt (stmt);
753 VEC (tree, heap) *access_fns = NULL;
754 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
755 tree base, off, access_fn;
756 basic_block before_loop = block_before_loop (nest);
757
758 while (handled_component_p (aref))
759 {
760 if (TREE_CODE (aref) == ARRAY_REF)
761 {
762 op = TREE_OPERAND (aref, 1);
763 access_fn = analyze_scalar_evolution (loop, op);
764 access_fn = instantiate_scev (before_loop, loop, access_fn);
765 VEC_safe_push (tree, heap, access_fns, access_fn);
766
767 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
768 }
769
770 aref = TREE_OPERAND (aref, 0);
771 }
772
773 if (INDIRECT_REF_P (aref))
774 {
775 op = TREE_OPERAND (aref, 0);
776 access_fn = analyze_scalar_evolution (loop, op);
777 access_fn = instantiate_scev (before_loop, loop, access_fn);
778 base = initial_condition (access_fn);
779 split_constant_offset (base, &base, &off);
780 access_fn = chrec_replace_initial_condition (access_fn,
781 fold_convert (TREE_TYPE (base), off));
782
783 TREE_OPERAND (aref, 0) = base;
784 VEC_safe_push (tree, heap, access_fns, access_fn);
785 }
786
787 DR_BASE_OBJECT (dr) = ref;
788 DR_ACCESS_FNS (dr) = access_fns;
789 }
790
791 /* Extracts the alias analysis information from the memory reference DR. */
792
793 static void
794 dr_analyze_alias (struct data_reference *dr)
795 {
796 tree ref = DR_REF (dr);
797 tree base = get_base_address (ref), addr;
798
799 if (INDIRECT_REF_P (base))
800 {
801 addr = TREE_OPERAND (base, 0);
802 if (TREE_CODE (addr) == SSA_NAME)
803 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
804 }
805 }
806
807 /* Returns true if the address of DR is invariant. */
808
809 static bool
810 dr_address_invariant_p (struct data_reference *dr)
811 {
812 unsigned i;
813 tree idx;
814
815 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
816 if (tree_contains_chrecs (idx, NULL))
817 return false;
818
819 return true;
820 }
821
822 /* Frees data reference DR. */
823
824 void
825 free_data_ref (data_reference_p dr)
826 {
827 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
828 free (dr);
829 }
830
831 /* Analyzes memory reference MEMREF accessed in STMT. The reference
832 is read if IS_READ is true, write otherwise. Returns the
833 data_reference description of MEMREF. NEST is the outermost loop of the
834 loop nest in that the reference should be analyzed. */
835
836 struct data_reference *
837 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
838 {
839 struct data_reference *dr;
840
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 {
843 fprintf (dump_file, "Creating dr for ");
844 print_generic_expr (dump_file, memref, TDF_SLIM);
845 fprintf (dump_file, "\n");
846 }
847
848 dr = XCNEW (struct data_reference);
849 DR_STMT (dr) = stmt;
850 DR_REF (dr) = memref;
851 DR_IS_READ (dr) = is_read;
852
853 dr_analyze_innermost (dr);
854 dr_analyze_indices (dr, nest);
855 dr_analyze_alias (dr);
856
857 if (dump_file && (dump_flags & TDF_DETAILS))
858 {
859 fprintf (dump_file, "\tbase_address: ");
860 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
861 fprintf (dump_file, "\n\toffset from base address: ");
862 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
863 fprintf (dump_file, "\n\tconstant offset from base address: ");
864 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
865 fprintf (dump_file, "\n\tstep: ");
866 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
867 fprintf (dump_file, "\n\taligned to: ");
868 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
869 fprintf (dump_file, "\n\tbase_object: ");
870 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
871 fprintf (dump_file, "\n");
872 }
873
874 return dr;
875 }
876
877 /* Returns true if FNA == FNB. */
878
879 static bool
880 affine_function_equal_p (affine_fn fna, affine_fn fnb)
881 {
882 unsigned i, n = VEC_length (tree, fna);
883
884 if (n != VEC_length (tree, fnb))
885 return false;
886
887 for (i = 0; i < n; i++)
888 if (!operand_equal_p (VEC_index (tree, fna, i),
889 VEC_index (tree, fnb, i), 0))
890 return false;
891
892 return true;
893 }
894
895 /* If all the functions in CF are the same, returns one of them,
896 otherwise returns NULL. */
897
898 static affine_fn
899 common_affine_function (conflict_function *cf)
900 {
901 unsigned i;
902 affine_fn comm;
903
904 if (!CF_NONTRIVIAL_P (cf))
905 return NULL;
906
907 comm = cf->fns[0];
908
909 for (i = 1; i < cf->n; i++)
910 if (!affine_function_equal_p (comm, cf->fns[i]))
911 return NULL;
912
913 return comm;
914 }
915
916 /* Returns the base of the affine function FN. */
917
918 static tree
919 affine_function_base (affine_fn fn)
920 {
921 return VEC_index (tree, fn, 0);
922 }
923
924 /* Returns true if FN is a constant. */
925
926 static bool
927 affine_function_constant_p (affine_fn fn)
928 {
929 unsigned i;
930 tree coef;
931
932 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
933 if (!integer_zerop (coef))
934 return false;
935
936 return true;
937 }
938
939 /* Returns true if FN is the zero constant function. */
940
941 static bool
942 affine_function_zero_p (affine_fn fn)
943 {
944 return (integer_zerop (affine_function_base (fn))
945 && affine_function_constant_p (fn));
946 }
947
948 /* Returns a signed integer type with the largest precision from TA
949 and TB. */
950
951 static tree
952 signed_type_for_types (tree ta, tree tb)
953 {
954 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
955 return signed_type_for (ta);
956 else
957 return signed_type_for (tb);
958 }
959
960 /* Applies operation OP on affine functions FNA and FNB, and returns the
961 result. */
962
963 static affine_fn
964 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
965 {
966 unsigned i, n, m;
967 affine_fn ret;
968 tree coef;
969
970 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
971 {
972 n = VEC_length (tree, fna);
973 m = VEC_length (tree, fnb);
974 }
975 else
976 {
977 n = VEC_length (tree, fnb);
978 m = VEC_length (tree, fna);
979 }
980
981 ret = VEC_alloc (tree, heap, m);
982 for (i = 0; i < n; i++)
983 {
984 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
985 TREE_TYPE (VEC_index (tree, fnb, i)));
986
987 VEC_quick_push (tree, ret,
988 fold_build2 (op, type,
989 VEC_index (tree, fna, i),
990 VEC_index (tree, fnb, i)));
991 }
992
993 for (; VEC_iterate (tree, fna, i, coef); i++)
994 VEC_quick_push (tree, ret,
995 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
996 coef, integer_zero_node));
997 for (; VEC_iterate (tree, fnb, i, coef); i++)
998 VEC_quick_push (tree, ret,
999 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1000 integer_zero_node, coef));
1001
1002 return ret;
1003 }
1004
1005 /* Returns the sum of affine functions FNA and FNB. */
1006
1007 static affine_fn
1008 affine_fn_plus (affine_fn fna, affine_fn fnb)
1009 {
1010 return affine_fn_op (PLUS_EXPR, fna, fnb);
1011 }
1012
1013 /* Returns the difference of affine functions FNA and FNB. */
1014
1015 static affine_fn
1016 affine_fn_minus (affine_fn fna, affine_fn fnb)
1017 {
1018 return affine_fn_op (MINUS_EXPR, fna, fnb);
1019 }
1020
1021 /* Frees affine function FN. */
1022
1023 static void
1024 affine_fn_free (affine_fn fn)
1025 {
1026 VEC_free (tree, heap, fn);
1027 }
1028
1029 /* Determine for each subscript in the data dependence relation DDR
1030 the distance. */
1031
1032 static void
1033 compute_subscript_distance (struct data_dependence_relation *ddr)
1034 {
1035 conflict_function *cf_a, *cf_b;
1036 affine_fn fn_a, fn_b, diff;
1037
1038 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1039 {
1040 unsigned int i;
1041
1042 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1043 {
1044 struct subscript *subscript;
1045
1046 subscript = DDR_SUBSCRIPT (ddr, i);
1047 cf_a = SUB_CONFLICTS_IN_A (subscript);
1048 cf_b = SUB_CONFLICTS_IN_B (subscript);
1049
1050 fn_a = common_affine_function (cf_a);
1051 fn_b = common_affine_function (cf_b);
1052 if (!fn_a || !fn_b)
1053 {
1054 SUB_DISTANCE (subscript) = chrec_dont_know;
1055 return;
1056 }
1057 diff = affine_fn_minus (fn_a, fn_b);
1058
1059 if (affine_function_constant_p (diff))
1060 SUB_DISTANCE (subscript) = affine_function_base (diff);
1061 else
1062 SUB_DISTANCE (subscript) = chrec_dont_know;
1063
1064 affine_fn_free (diff);
1065 }
1066 }
1067 }
1068
1069 /* Returns the conflict function for "unknown". */
1070
1071 static conflict_function *
1072 conflict_fn_not_known (void)
1073 {
1074 conflict_function *fn = XCNEW (conflict_function);
1075 fn->n = NOT_KNOWN;
1076
1077 return fn;
1078 }
1079
1080 /* Returns the conflict function for "independent". */
1081
1082 static conflict_function *
1083 conflict_fn_no_dependence (void)
1084 {
1085 conflict_function *fn = XCNEW (conflict_function);
1086 fn->n = NO_DEPENDENCE;
1087
1088 return fn;
1089 }
1090
1091 /* Returns true if the address of OBJ is invariant in LOOP. */
1092
1093 static bool
1094 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1095 {
1096 while (handled_component_p (obj))
1097 {
1098 if (TREE_CODE (obj) == ARRAY_REF)
1099 {
1100 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1101 need to check the stride and the lower bound of the reference. */
1102 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1103 loop->num)
1104 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1105 loop->num))
1106 return false;
1107 }
1108 else if (TREE_CODE (obj) == COMPONENT_REF)
1109 {
1110 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1111 loop->num))
1112 return false;
1113 }
1114 obj = TREE_OPERAND (obj, 0);
1115 }
1116
1117 if (!INDIRECT_REF_P (obj))
1118 return true;
1119
1120 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1121 loop->num);
1122 }
1123
1124 /* Returns true if A and B are accesses to different objects, or to different
1125 fields of the same object. */
1126
1127 static bool
1128 disjoint_objects_p (tree a, tree b)
1129 {
1130 tree base_a, base_b;
1131 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1132 bool ret;
1133
1134 base_a = get_base_address (a);
1135 base_b = get_base_address (b);
1136
1137 if (DECL_P (base_a)
1138 && DECL_P (base_b)
1139 && base_a != base_b)
1140 return true;
1141
1142 if (!operand_equal_p (base_a, base_b, 0))
1143 return false;
1144
1145 /* Compare the component references of A and B. We must start from the inner
1146 ones, so record them to the vector first. */
1147 while (handled_component_p (a))
1148 {
1149 VEC_safe_push (tree, heap, comp_a, a);
1150 a = TREE_OPERAND (a, 0);
1151 }
1152 while (handled_component_p (b))
1153 {
1154 VEC_safe_push (tree, heap, comp_b, b);
1155 b = TREE_OPERAND (b, 0);
1156 }
1157
1158 ret = false;
1159 while (1)
1160 {
1161 if (VEC_length (tree, comp_a) == 0
1162 || VEC_length (tree, comp_b) == 0)
1163 break;
1164
1165 a = VEC_pop (tree, comp_a);
1166 b = VEC_pop (tree, comp_b);
1167
1168 /* Real and imaginary part of a variable do not alias. */
1169 if ((TREE_CODE (a) == REALPART_EXPR
1170 && TREE_CODE (b) == IMAGPART_EXPR)
1171 || (TREE_CODE (a) == IMAGPART_EXPR
1172 && TREE_CODE (b) == REALPART_EXPR))
1173 {
1174 ret = true;
1175 break;
1176 }
1177
1178 if (TREE_CODE (a) != TREE_CODE (b))
1179 break;
1180
1181 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1182 DR_BASE_OBJECT are always zero. */
1183 if (TREE_CODE (a) == ARRAY_REF)
1184 continue;
1185 else if (TREE_CODE (a) == COMPONENT_REF)
1186 {
1187 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1188 continue;
1189
1190 /* Different fields of unions may overlap. */
1191 base_a = TREE_OPERAND (a, 0);
1192 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1193 break;
1194
1195 /* Different fields of structures cannot. */
1196 ret = true;
1197 break;
1198 }
1199 else
1200 break;
1201 }
1202
1203 VEC_free (tree, heap, comp_a);
1204 VEC_free (tree, heap, comp_b);
1205
1206 return ret;
1207 }
1208
1209 /* Returns false if we can prove that data references A and B do not alias,
1210 true otherwise. */
1211
1212 bool
1213 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1214 {
1215 const_tree addr_a = DR_BASE_ADDRESS (a);
1216 const_tree addr_b = DR_BASE_ADDRESS (b);
1217 const_tree type_a, type_b;
1218 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1219
1220 /* If the accessed objects are disjoint, the memory references do not
1221 alias. */
1222 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1223 return false;
1224
1225 /* Query the alias oracle. */
1226 if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1227 return false;
1228
1229 if (!addr_a || !addr_b)
1230 return true;
1231
1232 /* If the references are based on different static objects, they cannot
1233 alias (PTA should be able to disambiguate such accesses, but often
1234 it fails to). */
1235 if (TREE_CODE (addr_a) == ADDR_EXPR
1236 && TREE_CODE (addr_b) == ADDR_EXPR)
1237 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1238
1239 /* An instruction writing through a restricted pointer is "independent" of any
1240 instruction reading or writing through a different restricted pointer,
1241 in the same block/scope. */
1242
1243 type_a = TREE_TYPE (addr_a);
1244 type_b = TREE_TYPE (addr_b);
1245 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1246
1247 if (TREE_CODE (addr_a) == SSA_NAME)
1248 decl_a = SSA_NAME_VAR (addr_a);
1249 if (TREE_CODE (addr_b) == SSA_NAME)
1250 decl_b = SSA_NAME_VAR (addr_b);
1251
1252 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1253 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1254 && decl_a && DECL_P (decl_a)
1255 && decl_b && DECL_P (decl_b)
1256 && decl_a != decl_b
1257 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1258 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1259 return false;
1260
1261 return true;
1262 }
1263
1264 static void compute_self_dependence (struct data_dependence_relation *);
1265
1266 /* Initialize a data dependence relation between data accesses A and
1267 B. NB_LOOPS is the number of loops surrounding the references: the
1268 size of the classic distance/direction vectors. */
1269
1270 static struct data_dependence_relation *
1271 initialize_data_dependence_relation (struct data_reference *a,
1272 struct data_reference *b,
1273 VEC (loop_p, heap) *loop_nest)
1274 {
1275 struct data_dependence_relation *res;
1276 unsigned int i;
1277
1278 res = XNEW (struct data_dependence_relation);
1279 DDR_A (res) = a;
1280 DDR_B (res) = b;
1281 DDR_LOOP_NEST (res) = NULL;
1282 DDR_REVERSED_P (res) = false;
1283 DDR_SUBSCRIPTS (res) = NULL;
1284 DDR_DIR_VECTS (res) = NULL;
1285 DDR_DIST_VECTS (res) = NULL;
1286
1287 if (a == NULL || b == NULL)
1288 {
1289 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1290 return res;
1291 }
1292
1293 /* If the data references do not alias, then they are independent. */
1294 if (!dr_may_alias_p (a, b))
1295 {
1296 DDR_ARE_DEPENDENT (res) = chrec_known;
1297 return res;
1298 }
1299
1300 /* When the references are exactly the same, don't spend time doing
1301 the data dependence tests, just initialize the ddr and return. */
1302 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1303 {
1304 DDR_AFFINE_P (res) = true;
1305 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1306 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1307 DDR_LOOP_NEST (res) = loop_nest;
1308 DDR_INNER_LOOP (res) = 0;
1309 DDR_SELF_REFERENCE (res) = true;
1310 compute_self_dependence (res);
1311 return res;
1312 }
1313
1314 /* If the references do not access the same object, we do not know
1315 whether they alias or not. */
1316 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1317 {
1318 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1319 return res;
1320 }
1321
1322 /* If the base of the object is not invariant in the loop nest, we cannot
1323 analyze it. TODO -- in fact, it would suffice to record that there may
1324 be arbitrary dependences in the loops where the base object varies. */
1325 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1326 DR_BASE_OBJECT (a)))
1327 {
1328 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1329 return res;
1330 }
1331
1332 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1333
1334 DDR_AFFINE_P (res) = true;
1335 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1336 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1337 DDR_LOOP_NEST (res) = loop_nest;
1338 DDR_INNER_LOOP (res) = 0;
1339 DDR_SELF_REFERENCE (res) = false;
1340
1341 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1342 {
1343 struct subscript *subscript;
1344
1345 subscript = XNEW (struct subscript);
1346 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1347 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1348 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1349 SUB_DISTANCE (subscript) = chrec_dont_know;
1350 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1351 }
1352
1353 return res;
1354 }
1355
1356 /* Frees memory used by the conflict function F. */
1357
1358 static void
1359 free_conflict_function (conflict_function *f)
1360 {
1361 unsigned i;
1362
1363 if (CF_NONTRIVIAL_P (f))
1364 {
1365 for (i = 0; i < f->n; i++)
1366 affine_fn_free (f->fns[i]);
1367 }
1368 free (f);
1369 }
1370
1371 /* Frees memory used by SUBSCRIPTS. */
1372
1373 static void
1374 free_subscripts (VEC (subscript_p, heap) *subscripts)
1375 {
1376 unsigned i;
1377 subscript_p s;
1378
1379 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1380 {
1381 free_conflict_function (s->conflicting_iterations_in_a);
1382 free_conflict_function (s->conflicting_iterations_in_b);
1383 free (s);
1384 }
1385 VEC_free (subscript_p, heap, subscripts);
1386 }
1387
1388 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1389 description. */
1390
1391 static inline void
1392 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1393 tree chrec)
1394 {
1395 if (dump_file && (dump_flags & TDF_DETAILS))
1396 {
1397 fprintf (dump_file, "(dependence classified: ");
1398 print_generic_expr (dump_file, chrec, 0);
1399 fprintf (dump_file, ")\n");
1400 }
1401
1402 DDR_ARE_DEPENDENT (ddr) = chrec;
1403 free_subscripts (DDR_SUBSCRIPTS (ddr));
1404 DDR_SUBSCRIPTS (ddr) = NULL;
1405 }
1406
1407 /* The dependence relation DDR cannot be represented by a distance
1408 vector. */
1409
1410 static inline void
1411 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1412 {
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1415
1416 DDR_AFFINE_P (ddr) = false;
1417 }
1418
1419 \f
1420
1421 /* This section contains the classic Banerjee tests. */
1422
1423 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1424 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1425
1426 static inline bool
1427 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1428 {
1429 return (evolution_function_is_constant_p (chrec_a)
1430 && evolution_function_is_constant_p (chrec_b));
1431 }
1432
1433 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1434 variable, i.e., if the SIV (Single Index Variable) test is true. */
1435
1436 static bool
1437 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1438 {
1439 if ((evolution_function_is_constant_p (chrec_a)
1440 && evolution_function_is_univariate_p (chrec_b))
1441 || (evolution_function_is_constant_p (chrec_b)
1442 && evolution_function_is_univariate_p (chrec_a)))
1443 return true;
1444
1445 if (evolution_function_is_univariate_p (chrec_a)
1446 && evolution_function_is_univariate_p (chrec_b))
1447 {
1448 switch (TREE_CODE (chrec_a))
1449 {
1450 case POLYNOMIAL_CHREC:
1451 switch (TREE_CODE (chrec_b))
1452 {
1453 case POLYNOMIAL_CHREC:
1454 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1455 return false;
1456
1457 default:
1458 return true;
1459 }
1460
1461 default:
1462 return true;
1463 }
1464 }
1465
1466 return false;
1467 }
1468
1469 /* Creates a conflict function with N dimensions. The affine functions
1470 in each dimension follow. */
1471
1472 static conflict_function *
1473 conflict_fn (unsigned n, ...)
1474 {
1475 unsigned i;
1476 conflict_function *ret = XCNEW (conflict_function);
1477 va_list ap;
1478
1479 gcc_assert (0 < n && n <= MAX_DIM);
1480 va_start(ap, n);
1481
1482 ret->n = n;
1483 for (i = 0; i < n; i++)
1484 ret->fns[i] = va_arg (ap, affine_fn);
1485 va_end(ap);
1486
1487 return ret;
1488 }
1489
1490 /* Returns constant affine function with value CST. */
1491
1492 static affine_fn
1493 affine_fn_cst (tree cst)
1494 {
1495 affine_fn fn = VEC_alloc (tree, heap, 1);
1496 VEC_quick_push (tree, fn, cst);
1497 return fn;
1498 }
1499
1500 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1501
1502 static affine_fn
1503 affine_fn_univar (tree cst, unsigned dim, tree coef)
1504 {
1505 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1506 unsigned i;
1507
1508 gcc_assert (dim > 0);
1509 VEC_quick_push (tree, fn, cst);
1510 for (i = 1; i < dim; i++)
1511 VEC_quick_push (tree, fn, integer_zero_node);
1512 VEC_quick_push (tree, fn, coef);
1513 return fn;
1514 }
1515
1516 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1517 *OVERLAPS_B are initialized to the functions that describe the
1518 relation between the elements accessed twice by CHREC_A and
1519 CHREC_B. For k >= 0, the following property is verified:
1520
1521 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1522
1523 static void
1524 analyze_ziv_subscript (tree chrec_a,
1525 tree chrec_b,
1526 conflict_function **overlaps_a,
1527 conflict_function **overlaps_b,
1528 tree *last_conflicts)
1529 {
1530 tree type, difference;
1531 dependence_stats.num_ziv++;
1532
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "(analyze_ziv_subscript \n");
1535
1536 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1537 chrec_a = chrec_convert (type, chrec_a, NULL);
1538 chrec_b = chrec_convert (type, chrec_b, NULL);
1539 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1540
1541 switch (TREE_CODE (difference))
1542 {
1543 case INTEGER_CST:
1544 if (integer_zerop (difference))
1545 {
1546 /* The difference is equal to zero: the accessed index
1547 overlaps for each iteration in the loop. */
1548 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1549 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1550 *last_conflicts = chrec_dont_know;
1551 dependence_stats.num_ziv_dependent++;
1552 }
1553 else
1554 {
1555 /* The accesses do not overlap. */
1556 *overlaps_a = conflict_fn_no_dependence ();
1557 *overlaps_b = conflict_fn_no_dependence ();
1558 *last_conflicts = integer_zero_node;
1559 dependence_stats.num_ziv_independent++;
1560 }
1561 break;
1562
1563 default:
1564 /* We're not sure whether the indexes overlap. For the moment,
1565 conservatively answer "don't know". */
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1568
1569 *overlaps_a = conflict_fn_not_known ();
1570 *overlaps_b = conflict_fn_not_known ();
1571 *last_conflicts = chrec_dont_know;
1572 dependence_stats.num_ziv_unimplemented++;
1573 break;
1574 }
1575
1576 if (dump_file && (dump_flags & TDF_DETAILS))
1577 fprintf (dump_file, ")\n");
1578 }
1579
1580 /* Sets NIT to the estimated number of executions of the statements in
1581 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1582 large as the number of iterations. If we have no reliable estimate,
1583 the function returns false, otherwise returns true. */
1584
1585 bool
1586 estimated_loop_iterations (struct loop *loop, bool conservative,
1587 double_int *nit)
1588 {
1589 estimate_numbers_of_iterations_loop (loop);
1590 if (conservative)
1591 {
1592 if (!loop->any_upper_bound)
1593 return false;
1594
1595 *nit = loop->nb_iterations_upper_bound;
1596 }
1597 else
1598 {
1599 if (!loop->any_estimate)
1600 return false;
1601
1602 *nit = loop->nb_iterations_estimate;
1603 }
1604
1605 return true;
1606 }
1607
1608 /* Similar to estimated_loop_iterations, but returns the estimate only
1609 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1610 on the number of iterations of LOOP could not be derived, returns -1. */
1611
1612 HOST_WIDE_INT
1613 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1614 {
1615 double_int nit;
1616 HOST_WIDE_INT hwi_nit;
1617
1618 if (!estimated_loop_iterations (loop, conservative, &nit))
1619 return -1;
1620
1621 if (!double_int_fits_in_shwi_p (nit))
1622 return -1;
1623 hwi_nit = double_int_to_shwi (nit);
1624
1625 return hwi_nit < 0 ? -1 : hwi_nit;
1626 }
1627
1628 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1629 and only if it fits to the int type. If this is not the case, or the
1630 estimate on the number of iterations of LOOP could not be derived, returns
1631 chrec_dont_know. */
1632
1633 static tree
1634 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1635 {
1636 double_int nit;
1637 tree type;
1638
1639 if (!estimated_loop_iterations (loop, conservative, &nit))
1640 return chrec_dont_know;
1641
1642 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1643 if (!double_int_fits_to_tree_p (type, nit))
1644 return chrec_dont_know;
1645
1646 return double_int_to_tree (type, nit);
1647 }
1648
1649 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1650 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1651 *OVERLAPS_B are initialized to the functions that describe the
1652 relation between the elements accessed twice by CHREC_A and
1653 CHREC_B. For k >= 0, the following property is verified:
1654
1655 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1656
1657 static void
1658 analyze_siv_subscript_cst_affine (tree chrec_a,
1659 tree chrec_b,
1660 conflict_function **overlaps_a,
1661 conflict_function **overlaps_b,
1662 tree *last_conflicts)
1663 {
1664 bool value0, value1, value2;
1665 tree type, difference, tmp;
1666
1667 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1668 chrec_a = chrec_convert (type, chrec_a, NULL);
1669 chrec_b = chrec_convert (type, chrec_b, NULL);
1670 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1671
1672 if (!chrec_is_positive (initial_condition (difference), &value0))
1673 {
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1676
1677 dependence_stats.num_siv_unimplemented++;
1678 *overlaps_a = conflict_fn_not_known ();
1679 *overlaps_b = conflict_fn_not_known ();
1680 *last_conflicts = chrec_dont_know;
1681 return;
1682 }
1683 else
1684 {
1685 if (value0 == false)
1686 {
1687 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1688 {
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1690 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1691
1692 *overlaps_a = conflict_fn_not_known ();
1693 *overlaps_b = conflict_fn_not_known ();
1694 *last_conflicts = chrec_dont_know;
1695 dependence_stats.num_siv_unimplemented++;
1696 return;
1697 }
1698 else
1699 {
1700 if (value1 == true)
1701 {
1702 /* Example:
1703 chrec_a = 12
1704 chrec_b = {10, +, 1}
1705 */
1706
1707 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1708 {
1709 HOST_WIDE_INT numiter;
1710 struct loop *loop = get_chrec_loop (chrec_b);
1711
1712 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1713 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1714 fold_build1 (ABS_EXPR, type, difference),
1715 CHREC_RIGHT (chrec_b));
1716 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1717 *last_conflicts = integer_one_node;
1718
1719
1720 /* Perform weak-zero siv test to see if overlap is
1721 outside the loop bounds. */
1722 numiter = estimated_loop_iterations_int (loop, false);
1723
1724 if (numiter >= 0
1725 && compare_tree_int (tmp, numiter) > 0)
1726 {
1727 free_conflict_function (*overlaps_a);
1728 free_conflict_function (*overlaps_b);
1729 *overlaps_a = conflict_fn_no_dependence ();
1730 *overlaps_b = conflict_fn_no_dependence ();
1731 *last_conflicts = integer_zero_node;
1732 dependence_stats.num_siv_independent++;
1733 return;
1734 }
1735 dependence_stats.num_siv_dependent++;
1736 return;
1737 }
1738
1739 /* When the step does not divide the difference, there are
1740 no overlaps. */
1741 else
1742 {
1743 *overlaps_a = conflict_fn_no_dependence ();
1744 *overlaps_b = conflict_fn_no_dependence ();
1745 *last_conflicts = integer_zero_node;
1746 dependence_stats.num_siv_independent++;
1747 return;
1748 }
1749 }
1750
1751 else
1752 {
1753 /* Example:
1754 chrec_a = 12
1755 chrec_b = {10, +, -1}
1756
1757 In this case, chrec_a will not overlap with chrec_b. */
1758 *overlaps_a = conflict_fn_no_dependence ();
1759 *overlaps_b = conflict_fn_no_dependence ();
1760 *last_conflicts = integer_zero_node;
1761 dependence_stats.num_siv_independent++;
1762 return;
1763 }
1764 }
1765 }
1766 else
1767 {
1768 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1769 {
1770 if (dump_file && (dump_flags & TDF_DETAILS))
1771 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1772
1773 *overlaps_a = conflict_fn_not_known ();
1774 *overlaps_b = conflict_fn_not_known ();
1775 *last_conflicts = chrec_dont_know;
1776 dependence_stats.num_siv_unimplemented++;
1777 return;
1778 }
1779 else
1780 {
1781 if (value2 == false)
1782 {
1783 /* Example:
1784 chrec_a = 3
1785 chrec_b = {10, +, -1}
1786 */
1787 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1788 {
1789 HOST_WIDE_INT numiter;
1790 struct loop *loop = get_chrec_loop (chrec_b);
1791
1792 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1793 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1794 CHREC_RIGHT (chrec_b));
1795 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1796 *last_conflicts = integer_one_node;
1797
1798 /* Perform weak-zero siv test to see if overlap is
1799 outside the loop bounds. */
1800 numiter = estimated_loop_iterations_int (loop, false);
1801
1802 if (numiter >= 0
1803 && compare_tree_int (tmp, numiter) > 0)
1804 {
1805 free_conflict_function (*overlaps_a);
1806 free_conflict_function (*overlaps_b);
1807 *overlaps_a = conflict_fn_no_dependence ();
1808 *overlaps_b = conflict_fn_no_dependence ();
1809 *last_conflicts = integer_zero_node;
1810 dependence_stats.num_siv_independent++;
1811 return;
1812 }
1813 dependence_stats.num_siv_dependent++;
1814 return;
1815 }
1816
1817 /* When the step does not divide the difference, there
1818 are no overlaps. */
1819 else
1820 {
1821 *overlaps_a = conflict_fn_no_dependence ();
1822 *overlaps_b = conflict_fn_no_dependence ();
1823 *last_conflicts = integer_zero_node;
1824 dependence_stats.num_siv_independent++;
1825 return;
1826 }
1827 }
1828 else
1829 {
1830 /* Example:
1831 chrec_a = 3
1832 chrec_b = {4, +, 1}
1833
1834 In this case, chrec_a will not overlap with chrec_b. */
1835 *overlaps_a = conflict_fn_no_dependence ();
1836 *overlaps_b = conflict_fn_no_dependence ();
1837 *last_conflicts = integer_zero_node;
1838 dependence_stats.num_siv_independent++;
1839 return;
1840 }
1841 }
1842 }
1843 }
1844 }
1845
1846 /* Helper recursive function for initializing the matrix A. Returns
1847 the initial value of CHREC. */
1848
1849 static tree
1850 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1851 {
1852 gcc_assert (chrec);
1853
1854 switch (TREE_CODE (chrec))
1855 {
1856 case POLYNOMIAL_CHREC:
1857 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1858
1859 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1860 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1861
1862 case PLUS_EXPR:
1863 case MULT_EXPR:
1864 case MINUS_EXPR:
1865 {
1866 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1867 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1868
1869 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1870 }
1871
1872 case NOP_EXPR:
1873 {
1874 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1875 return chrec_convert (chrec_type (chrec), op, NULL);
1876 }
1877
1878 case BIT_NOT_EXPR:
1879 {
1880 /* Handle ~X as -1 - X. */
1881 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1882 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1883 build_int_cst (TREE_TYPE (chrec), -1), op);
1884 }
1885
1886 case INTEGER_CST:
1887 return chrec;
1888
1889 default:
1890 gcc_unreachable ();
1891 return NULL_TREE;
1892 }
1893 }
1894
1895 #define FLOOR_DIV(x,y) ((x) / (y))
1896
1897 /* Solves the special case of the Diophantine equation:
1898 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1899
1900 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1901 number of iterations that loops X and Y run. The overlaps will be
1902 constructed as evolutions in dimension DIM. */
1903
1904 static void
1905 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1906 affine_fn *overlaps_a,
1907 affine_fn *overlaps_b,
1908 tree *last_conflicts, int dim)
1909 {
1910 if (((step_a > 0 && step_b > 0)
1911 || (step_a < 0 && step_b < 0)))
1912 {
1913 int step_overlaps_a, step_overlaps_b;
1914 int gcd_steps_a_b, last_conflict, tau2;
1915
1916 gcd_steps_a_b = gcd (step_a, step_b);
1917 step_overlaps_a = step_b / gcd_steps_a_b;
1918 step_overlaps_b = step_a / gcd_steps_a_b;
1919
1920 if (niter > 0)
1921 {
1922 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1923 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1924 last_conflict = tau2;
1925 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1926 }
1927 else
1928 *last_conflicts = chrec_dont_know;
1929
1930 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1931 build_int_cst (NULL_TREE,
1932 step_overlaps_a));
1933 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1934 build_int_cst (NULL_TREE,
1935 step_overlaps_b));
1936 }
1937
1938 else
1939 {
1940 *overlaps_a = affine_fn_cst (integer_zero_node);
1941 *overlaps_b = affine_fn_cst (integer_zero_node);
1942 *last_conflicts = integer_zero_node;
1943 }
1944 }
1945
1946 /* Solves the special case of a Diophantine equation where CHREC_A is
1947 an affine bivariate function, and CHREC_B is an affine univariate
1948 function. For example,
1949
1950 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1951
1952 has the following overlapping functions:
1953
1954 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1955 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1956 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1957
1958 FORNOW: This is a specialized implementation for a case occurring in
1959 a common benchmark. Implement the general algorithm. */
1960
1961 static void
1962 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1963 conflict_function **overlaps_a,
1964 conflict_function **overlaps_b,
1965 tree *last_conflicts)
1966 {
1967 bool xz_p, yz_p, xyz_p;
1968 int step_x, step_y, step_z;
1969 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1970 affine_fn overlaps_a_xz, overlaps_b_xz;
1971 affine_fn overlaps_a_yz, overlaps_b_yz;
1972 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1973 affine_fn ova1, ova2, ovb;
1974 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1975
1976 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1977 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1978 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1979
1980 niter_x =
1981 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1982 false);
1983 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1984 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1985
1986 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1987 {
1988 if (dump_file && (dump_flags & TDF_DETAILS))
1989 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1990
1991 *overlaps_a = conflict_fn_not_known ();
1992 *overlaps_b = conflict_fn_not_known ();
1993 *last_conflicts = chrec_dont_know;
1994 return;
1995 }
1996
1997 niter = MIN (niter_x, niter_z);
1998 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1999 &overlaps_a_xz,
2000 &overlaps_b_xz,
2001 &last_conflicts_xz, 1);
2002 niter = MIN (niter_y, niter_z);
2003 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2004 &overlaps_a_yz,
2005 &overlaps_b_yz,
2006 &last_conflicts_yz, 2);
2007 niter = MIN (niter_x, niter_z);
2008 niter = MIN (niter_y, niter);
2009 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2010 &overlaps_a_xyz,
2011 &overlaps_b_xyz,
2012 &last_conflicts_xyz, 3);
2013
2014 xz_p = !integer_zerop (last_conflicts_xz);
2015 yz_p = !integer_zerop (last_conflicts_yz);
2016 xyz_p = !integer_zerop (last_conflicts_xyz);
2017
2018 if (xz_p || yz_p || xyz_p)
2019 {
2020 ova1 = affine_fn_cst (integer_zero_node);
2021 ova2 = affine_fn_cst (integer_zero_node);
2022 ovb = affine_fn_cst (integer_zero_node);
2023 if (xz_p)
2024 {
2025 affine_fn t0 = ova1;
2026 affine_fn t2 = ovb;
2027
2028 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2029 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2030 affine_fn_free (t0);
2031 affine_fn_free (t2);
2032 *last_conflicts = last_conflicts_xz;
2033 }
2034 if (yz_p)
2035 {
2036 affine_fn t0 = ova2;
2037 affine_fn t2 = ovb;
2038
2039 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2040 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2041 affine_fn_free (t0);
2042 affine_fn_free (t2);
2043 *last_conflicts = last_conflicts_yz;
2044 }
2045 if (xyz_p)
2046 {
2047 affine_fn t0 = ova1;
2048 affine_fn t2 = ova2;
2049 affine_fn t4 = ovb;
2050
2051 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2052 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2053 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2054 affine_fn_free (t0);
2055 affine_fn_free (t2);
2056 affine_fn_free (t4);
2057 *last_conflicts = last_conflicts_xyz;
2058 }
2059 *overlaps_a = conflict_fn (2, ova1, ova2);
2060 *overlaps_b = conflict_fn (1, ovb);
2061 }
2062 else
2063 {
2064 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2065 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2066 *last_conflicts = integer_zero_node;
2067 }
2068
2069 affine_fn_free (overlaps_a_xz);
2070 affine_fn_free (overlaps_b_xz);
2071 affine_fn_free (overlaps_a_yz);
2072 affine_fn_free (overlaps_b_yz);
2073 affine_fn_free (overlaps_a_xyz);
2074 affine_fn_free (overlaps_b_xyz);
2075 }
2076
2077 /* Determines the overlapping elements due to accesses CHREC_A and
2078 CHREC_B, that are affine functions. This function cannot handle
2079 symbolic evolution functions, ie. when initial conditions are
2080 parameters, because it uses lambda matrices of integers. */
2081
2082 static void
2083 analyze_subscript_affine_affine (tree chrec_a,
2084 tree chrec_b,
2085 conflict_function **overlaps_a,
2086 conflict_function **overlaps_b,
2087 tree *last_conflicts)
2088 {
2089 unsigned nb_vars_a, nb_vars_b, dim;
2090 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2091 lambda_matrix A, U, S;
2092
2093 if (eq_evolutions_p (chrec_a, chrec_b))
2094 {
2095 /* The accessed index overlaps for each iteration in the
2096 loop. */
2097 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2098 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2099 *last_conflicts = chrec_dont_know;
2100 return;
2101 }
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2103 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2104
2105 /* For determining the initial intersection, we have to solve a
2106 Diophantine equation. This is the most time consuming part.
2107
2108 For answering to the question: "Is there a dependence?" we have
2109 to prove that there exists a solution to the Diophantine
2110 equation, and that the solution is in the iteration domain,
2111 i.e. the solution is positive or zero, and that the solution
2112 happens before the upper bound loop.nb_iterations. Otherwise
2113 there is no dependence. This function outputs a description of
2114 the iterations that hold the intersections. */
2115
2116 nb_vars_a = nb_vars_in_chrec (chrec_a);
2117 nb_vars_b = nb_vars_in_chrec (chrec_b);
2118
2119 dim = nb_vars_a + nb_vars_b;
2120 U = lambda_matrix_new (dim, dim);
2121 A = lambda_matrix_new (dim, 1);
2122 S = lambda_matrix_new (dim, 1);
2123
2124 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2125 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2126 gamma = init_b - init_a;
2127
2128 /* Don't do all the hard work of solving the Diophantine equation
2129 when we already know the solution: for example,
2130 | {3, +, 1}_1
2131 | {3, +, 4}_2
2132 | gamma = 3 - 3 = 0.
2133 Then the first overlap occurs during the first iterations:
2134 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2135 */
2136 if (gamma == 0)
2137 {
2138 if (nb_vars_a == 1 && nb_vars_b == 1)
2139 {
2140 HOST_WIDE_INT step_a, step_b;
2141 HOST_WIDE_INT niter, niter_a, niter_b;
2142 affine_fn ova, ovb;
2143
2144 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2145 false);
2146 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2147 false);
2148 niter = MIN (niter_a, niter_b);
2149 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2150 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2151
2152 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2153 &ova, &ovb,
2154 last_conflicts, 1);
2155 *overlaps_a = conflict_fn (1, ova);
2156 *overlaps_b = conflict_fn (1, ovb);
2157 }
2158
2159 else if (nb_vars_a == 2 && nb_vars_b == 1)
2160 compute_overlap_steps_for_affine_1_2
2161 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2162
2163 else if (nb_vars_a == 1 && nb_vars_b == 2)
2164 compute_overlap_steps_for_affine_1_2
2165 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2166
2167 else
2168 {
2169 if (dump_file && (dump_flags & TDF_DETAILS))
2170 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2171 *overlaps_a = conflict_fn_not_known ();
2172 *overlaps_b = conflict_fn_not_known ();
2173 *last_conflicts = chrec_dont_know;
2174 }
2175 goto end_analyze_subs_aa;
2176 }
2177
2178 /* U.A = S */
2179 lambda_matrix_right_hermite (A, dim, 1, S, U);
2180
2181 if (S[0][0] < 0)
2182 {
2183 S[0][0] *= -1;
2184 lambda_matrix_row_negate (U, dim, 0);
2185 }
2186 gcd_alpha_beta = S[0][0];
2187
2188 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2189 but that is a quite strange case. Instead of ICEing, answer
2190 don't know. */
2191 if (gcd_alpha_beta == 0)
2192 {
2193 *overlaps_a = conflict_fn_not_known ();
2194 *overlaps_b = conflict_fn_not_known ();
2195 *last_conflicts = chrec_dont_know;
2196 goto end_analyze_subs_aa;
2197 }
2198
2199 /* The classic "gcd-test". */
2200 if (!int_divides_p (gcd_alpha_beta, gamma))
2201 {
2202 /* The "gcd-test" has determined that there is no integer
2203 solution, i.e. there is no dependence. */
2204 *overlaps_a = conflict_fn_no_dependence ();
2205 *overlaps_b = conflict_fn_no_dependence ();
2206 *last_conflicts = integer_zero_node;
2207 }
2208
2209 /* Both access functions are univariate. This includes SIV and MIV cases. */
2210 else if (nb_vars_a == 1 && nb_vars_b == 1)
2211 {
2212 /* Both functions should have the same evolution sign. */
2213 if (((A[0][0] > 0 && -A[1][0] > 0)
2214 || (A[0][0] < 0 && -A[1][0] < 0)))
2215 {
2216 /* The solutions are given by:
2217 |
2218 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2219 | [u21 u22] [y0]
2220
2221 For a given integer t. Using the following variables,
2222
2223 | i0 = u11 * gamma / gcd_alpha_beta
2224 | j0 = u12 * gamma / gcd_alpha_beta
2225 | i1 = u21
2226 | j1 = u22
2227
2228 the solutions are:
2229
2230 | x0 = i0 + i1 * t,
2231 | y0 = j0 + j1 * t. */
2232 HOST_WIDE_INT i0, j0, i1, j1;
2233
2234 i0 = U[0][0] * gamma / gcd_alpha_beta;
2235 j0 = U[0][1] * gamma / gcd_alpha_beta;
2236 i1 = U[1][0];
2237 j1 = U[1][1];
2238
2239 if ((i1 == 0 && i0 < 0)
2240 || (j1 == 0 && j0 < 0))
2241 {
2242 /* There is no solution.
2243 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2244 falls in here, but for the moment we don't look at the
2245 upper bound of the iteration domain. */
2246 *overlaps_a = conflict_fn_no_dependence ();
2247 *overlaps_b = conflict_fn_no_dependence ();
2248 *last_conflicts = integer_zero_node;
2249 goto end_analyze_subs_aa;
2250 }
2251
2252 if (i1 > 0 && j1 > 0)
2253 {
2254 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2255 (get_chrec_loop (chrec_a), false);
2256 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2257 (get_chrec_loop (chrec_b), false);
2258 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2259
2260 /* (X0, Y0) is a solution of the Diophantine equation:
2261 "chrec_a (X0) = chrec_b (Y0)". */
2262 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2263 CEIL (-j0, j1));
2264 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2265 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2266
2267 /* (X1, Y1) is the smallest positive solution of the eq
2268 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2269 first conflict occurs. */
2270 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2271 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2272 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2273
2274 if (niter > 0)
2275 {
2276 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2277 FLOOR_DIV (niter - j0, j1));
2278 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2279
2280 /* If the overlap occurs outside of the bounds of the
2281 loop, there is no dependence. */
2282 if (x1 >= niter || y1 >= niter)
2283 {
2284 *overlaps_a = conflict_fn_no_dependence ();
2285 *overlaps_b = conflict_fn_no_dependence ();
2286 *last_conflicts = integer_zero_node;
2287 goto end_analyze_subs_aa;
2288 }
2289 else
2290 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2291 }
2292 else
2293 *last_conflicts = chrec_dont_know;
2294
2295 *overlaps_a
2296 = conflict_fn (1,
2297 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2298 1,
2299 build_int_cst (NULL_TREE, i1)));
2300 *overlaps_b
2301 = conflict_fn (1,
2302 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2303 1,
2304 build_int_cst (NULL_TREE, j1)));
2305 }
2306 else
2307 {
2308 /* FIXME: For the moment, the upper bound of the
2309 iteration domain for i and j is not checked. */
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2312 *overlaps_a = conflict_fn_not_known ();
2313 *overlaps_b = conflict_fn_not_known ();
2314 *last_conflicts = chrec_dont_know;
2315 }
2316 }
2317 else
2318 {
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2321 *overlaps_a = conflict_fn_not_known ();
2322 *overlaps_b = conflict_fn_not_known ();
2323 *last_conflicts = chrec_dont_know;
2324 }
2325 }
2326 else
2327 {
2328 if (dump_file && (dump_flags & TDF_DETAILS))
2329 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2330 *overlaps_a = conflict_fn_not_known ();
2331 *overlaps_b = conflict_fn_not_known ();
2332 *last_conflicts = chrec_dont_know;
2333 }
2334
2335 end_analyze_subs_aa:
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 {
2338 fprintf (dump_file, " (overlaps_a = ");
2339 dump_conflict_function (dump_file, *overlaps_a);
2340 fprintf (dump_file, ")\n (overlaps_b = ");
2341 dump_conflict_function (dump_file, *overlaps_b);
2342 fprintf (dump_file, ")\n");
2343 fprintf (dump_file, ")\n");
2344 }
2345 }
2346
2347 /* Returns true when analyze_subscript_affine_affine can be used for
2348 determining the dependence relation between chrec_a and chrec_b,
2349 that contain symbols. This function modifies chrec_a and chrec_b
2350 such that the analysis result is the same, and such that they don't
2351 contain symbols, and then can safely be passed to the analyzer.
2352
2353 Example: The analysis of the following tuples of evolutions produce
2354 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2355 vs. {0, +, 1}_1
2356
2357 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2358 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2359 */
2360
2361 static bool
2362 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2363 {
2364 tree diff, type, left_a, left_b, right_b;
2365
2366 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2367 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2368 /* FIXME: For the moment not handled. Might be refined later. */
2369 return false;
2370
2371 type = chrec_type (*chrec_a);
2372 left_a = CHREC_LEFT (*chrec_a);
2373 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2374 diff = chrec_fold_minus (type, left_a, left_b);
2375
2376 if (!evolution_function_is_constant_p (diff))
2377 return false;
2378
2379 if (dump_file && (dump_flags & TDF_DETAILS))
2380 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2381
2382 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2383 diff, CHREC_RIGHT (*chrec_a));
2384 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2385 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2386 build_int_cst (type, 0),
2387 right_b);
2388 return true;
2389 }
2390
2391 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2392 *OVERLAPS_B are initialized to the functions that describe the
2393 relation between the elements accessed twice by CHREC_A and
2394 CHREC_B. For k >= 0, the following property is verified:
2395
2396 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2397
2398 static void
2399 analyze_siv_subscript (tree chrec_a,
2400 tree chrec_b,
2401 conflict_function **overlaps_a,
2402 conflict_function **overlaps_b,
2403 tree *last_conflicts,
2404 int loop_nest_num)
2405 {
2406 dependence_stats.num_siv++;
2407
2408 if (dump_file && (dump_flags & TDF_DETAILS))
2409 fprintf (dump_file, "(analyze_siv_subscript \n");
2410
2411 if (evolution_function_is_constant_p (chrec_a)
2412 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2413 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2414 overlaps_a, overlaps_b, last_conflicts);
2415
2416 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2417 && evolution_function_is_constant_p (chrec_b))
2418 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2419 overlaps_b, overlaps_a, last_conflicts);
2420
2421 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2422 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2423 {
2424 if (!chrec_contains_symbols (chrec_a)
2425 && !chrec_contains_symbols (chrec_b))
2426 {
2427 analyze_subscript_affine_affine (chrec_a, chrec_b,
2428 overlaps_a, overlaps_b,
2429 last_conflicts);
2430
2431 if (CF_NOT_KNOWN_P (*overlaps_a)
2432 || CF_NOT_KNOWN_P (*overlaps_b))
2433 dependence_stats.num_siv_unimplemented++;
2434 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2435 || CF_NO_DEPENDENCE_P (*overlaps_b))
2436 dependence_stats.num_siv_independent++;
2437 else
2438 dependence_stats.num_siv_dependent++;
2439 }
2440 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2441 &chrec_b))
2442 {
2443 analyze_subscript_affine_affine (chrec_a, chrec_b,
2444 overlaps_a, overlaps_b,
2445 last_conflicts);
2446
2447 if (CF_NOT_KNOWN_P (*overlaps_a)
2448 || CF_NOT_KNOWN_P (*overlaps_b))
2449 dependence_stats.num_siv_unimplemented++;
2450 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2451 || CF_NO_DEPENDENCE_P (*overlaps_b))
2452 dependence_stats.num_siv_independent++;
2453 else
2454 dependence_stats.num_siv_dependent++;
2455 }
2456 else
2457 goto siv_subscript_dontknow;
2458 }
2459
2460 else
2461 {
2462 siv_subscript_dontknow:;
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, "siv test failed: unimplemented.\n");
2465 *overlaps_a = conflict_fn_not_known ();
2466 *overlaps_b = conflict_fn_not_known ();
2467 *last_conflicts = chrec_dont_know;
2468 dependence_stats.num_siv_unimplemented++;
2469 }
2470
2471 if (dump_file && (dump_flags & TDF_DETAILS))
2472 fprintf (dump_file, ")\n");
2473 }
2474
2475 /* Returns false if we can prove that the greatest common divisor of the steps
2476 of CHREC does not divide CST, false otherwise. */
2477
2478 static bool
2479 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2480 {
2481 HOST_WIDE_INT cd = 0, val;
2482 tree step;
2483
2484 if (!host_integerp (cst, 0))
2485 return true;
2486 val = tree_low_cst (cst, 0);
2487
2488 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2489 {
2490 step = CHREC_RIGHT (chrec);
2491 if (!host_integerp (step, 0))
2492 return true;
2493 cd = gcd (cd, tree_low_cst (step, 0));
2494 chrec = CHREC_LEFT (chrec);
2495 }
2496
2497 return val % cd == 0;
2498 }
2499
2500 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2501 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2502 functions that describe the relation between the elements accessed
2503 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2504 is verified:
2505
2506 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2507
2508 static void
2509 analyze_miv_subscript (tree chrec_a,
2510 tree chrec_b,
2511 conflict_function **overlaps_a,
2512 conflict_function **overlaps_b,
2513 tree *last_conflicts,
2514 struct loop *loop_nest)
2515 {
2516 /* FIXME: This is a MIV subscript, not yet handled.
2517 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2518 (A[i] vs. A[j]).
2519
2520 In the SIV test we had to solve a Diophantine equation with two
2521 variables. In the MIV case we have to solve a Diophantine
2522 equation with 2*n variables (if the subscript uses n IVs).
2523 */
2524 tree type, difference;
2525
2526 dependence_stats.num_miv++;
2527 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, "(analyze_miv_subscript \n");
2529
2530 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2531 chrec_a = chrec_convert (type, chrec_a, NULL);
2532 chrec_b = chrec_convert (type, chrec_b, NULL);
2533 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2534
2535 if (eq_evolutions_p (chrec_a, chrec_b))
2536 {
2537 /* Access functions are the same: all the elements are accessed
2538 in the same order. */
2539 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2540 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2541 *last_conflicts = estimated_loop_iterations_tree
2542 (get_chrec_loop (chrec_a), true);
2543 dependence_stats.num_miv_dependent++;
2544 }
2545
2546 else if (evolution_function_is_constant_p (difference)
2547 /* For the moment, the following is verified:
2548 evolution_function_is_affine_multivariate_p (chrec_a,
2549 loop_nest->num) */
2550 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2551 {
2552 /* testsuite/.../ssa-chrec-33.c
2553 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2554
2555 The difference is 1, and all the evolution steps are multiples
2556 of 2, consequently there are no overlapping elements. */
2557 *overlaps_a = conflict_fn_no_dependence ();
2558 *overlaps_b = conflict_fn_no_dependence ();
2559 *last_conflicts = integer_zero_node;
2560 dependence_stats.num_miv_independent++;
2561 }
2562
2563 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2564 && !chrec_contains_symbols (chrec_a)
2565 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2566 && !chrec_contains_symbols (chrec_b))
2567 {
2568 /* testsuite/.../ssa-chrec-35.c
2569 {0, +, 1}_2 vs. {0, +, 1}_3
2570 the overlapping elements are respectively located at iterations:
2571 {0, +, 1}_x and {0, +, 1}_x,
2572 in other words, we have the equality:
2573 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2574
2575 Other examples:
2576 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2577 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2578
2579 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2580 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2581 */
2582 analyze_subscript_affine_affine (chrec_a, chrec_b,
2583 overlaps_a, overlaps_b, last_conflicts);
2584
2585 if (CF_NOT_KNOWN_P (*overlaps_a)
2586 || CF_NOT_KNOWN_P (*overlaps_b))
2587 dependence_stats.num_miv_unimplemented++;
2588 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2589 || CF_NO_DEPENDENCE_P (*overlaps_b))
2590 dependence_stats.num_miv_independent++;
2591 else
2592 dependence_stats.num_miv_dependent++;
2593 }
2594
2595 else
2596 {
2597 /* When the analysis is too difficult, answer "don't know". */
2598 if (dump_file && (dump_flags & TDF_DETAILS))
2599 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2600
2601 *overlaps_a = conflict_fn_not_known ();
2602 *overlaps_b = conflict_fn_not_known ();
2603 *last_conflicts = chrec_dont_know;
2604 dependence_stats.num_miv_unimplemented++;
2605 }
2606
2607 if (dump_file && (dump_flags & TDF_DETAILS))
2608 fprintf (dump_file, ")\n");
2609 }
2610
2611 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2612 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2613 OVERLAP_ITERATIONS_B are initialized with two functions that
2614 describe the iterations that contain conflicting elements.
2615
2616 Remark: For an integer k >= 0, the following equality is true:
2617
2618 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2619 */
2620
2621 static void
2622 analyze_overlapping_iterations (tree chrec_a,
2623 tree chrec_b,
2624 conflict_function **overlap_iterations_a,
2625 conflict_function **overlap_iterations_b,
2626 tree *last_conflicts, struct loop *loop_nest)
2627 {
2628 unsigned int lnn = loop_nest->num;
2629
2630 dependence_stats.num_subscript_tests++;
2631
2632 if (dump_file && (dump_flags & TDF_DETAILS))
2633 {
2634 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2635 fprintf (dump_file, " (chrec_a = ");
2636 print_generic_expr (dump_file, chrec_a, 0);
2637 fprintf (dump_file, ")\n (chrec_b = ");
2638 print_generic_expr (dump_file, chrec_b, 0);
2639 fprintf (dump_file, ")\n");
2640 }
2641
2642 if (chrec_a == NULL_TREE
2643 || chrec_b == NULL_TREE
2644 || chrec_contains_undetermined (chrec_a)
2645 || chrec_contains_undetermined (chrec_b))
2646 {
2647 dependence_stats.num_subscript_undetermined++;
2648
2649 *overlap_iterations_a = conflict_fn_not_known ();
2650 *overlap_iterations_b = conflict_fn_not_known ();
2651 }
2652
2653 /* If they are the same chrec, and are affine, they overlap
2654 on every iteration. */
2655 else if (eq_evolutions_p (chrec_a, chrec_b)
2656 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2657 {
2658 dependence_stats.num_same_subscript_function++;
2659 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2660 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2661 *last_conflicts = chrec_dont_know;
2662 }
2663
2664 /* If they aren't the same, and aren't affine, we can't do anything
2665 yet. */
2666 else if ((chrec_contains_symbols (chrec_a)
2667 || chrec_contains_symbols (chrec_b))
2668 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2669 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2670 {
2671 dependence_stats.num_subscript_undetermined++;
2672 *overlap_iterations_a = conflict_fn_not_known ();
2673 *overlap_iterations_b = conflict_fn_not_known ();
2674 }
2675
2676 else if (ziv_subscript_p (chrec_a, chrec_b))
2677 analyze_ziv_subscript (chrec_a, chrec_b,
2678 overlap_iterations_a, overlap_iterations_b,
2679 last_conflicts);
2680
2681 else if (siv_subscript_p (chrec_a, chrec_b))
2682 analyze_siv_subscript (chrec_a, chrec_b,
2683 overlap_iterations_a, overlap_iterations_b,
2684 last_conflicts, lnn);
2685
2686 else
2687 analyze_miv_subscript (chrec_a, chrec_b,
2688 overlap_iterations_a, overlap_iterations_b,
2689 last_conflicts, loop_nest);
2690
2691 if (dump_file && (dump_flags & TDF_DETAILS))
2692 {
2693 fprintf (dump_file, " (overlap_iterations_a = ");
2694 dump_conflict_function (dump_file, *overlap_iterations_a);
2695 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2696 dump_conflict_function (dump_file, *overlap_iterations_b);
2697 fprintf (dump_file, ")\n");
2698 fprintf (dump_file, ")\n");
2699 }
2700 }
2701
2702 /* Helper function for uniquely inserting distance vectors. */
2703
2704 static void
2705 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2706 {
2707 unsigned i;
2708 lambda_vector v;
2709
2710 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2711 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2712 return;
2713
2714 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2715 }
2716
2717 /* Helper function for uniquely inserting direction vectors. */
2718
2719 static void
2720 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2721 {
2722 unsigned i;
2723 lambda_vector v;
2724
2725 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2726 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2727 return;
2728
2729 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2730 }
2731
2732 /* Add a distance of 1 on all the loops outer than INDEX. If we
2733 haven't yet determined a distance for this outer loop, push a new
2734 distance vector composed of the previous distance, and a distance
2735 of 1 for this outer loop. Example:
2736
2737 | loop_1
2738 | loop_2
2739 | A[10]
2740 | endloop_2
2741 | endloop_1
2742
2743 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2744 save (0, 1), then we have to save (1, 0). */
2745
2746 static void
2747 add_outer_distances (struct data_dependence_relation *ddr,
2748 lambda_vector dist_v, int index)
2749 {
2750 /* For each outer loop where init_v is not set, the accesses are
2751 in dependence of distance 1 in the loop. */
2752 while (--index >= 0)
2753 {
2754 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2755 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2756 save_v[index] = 1;
2757 save_dist_v (ddr, save_v);
2758 }
2759 }
2760
2761 /* Return false when fail to represent the data dependence as a
2762 distance vector. INIT_B is set to true when a component has been
2763 added to the distance vector DIST_V. INDEX_CARRY is then set to
2764 the index in DIST_V that carries the dependence. */
2765
2766 static bool
2767 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2768 struct data_reference *ddr_a,
2769 struct data_reference *ddr_b,
2770 lambda_vector dist_v, bool *init_b,
2771 int *index_carry)
2772 {
2773 unsigned i;
2774 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2775
2776 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2777 {
2778 tree access_fn_a, access_fn_b;
2779 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2780
2781 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2782 {
2783 non_affine_dependence_relation (ddr);
2784 return false;
2785 }
2786
2787 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2788 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2789
2790 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2791 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2792 {
2793 int dist, index;
2794 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2795 DDR_LOOP_NEST (ddr));
2796 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2797 DDR_LOOP_NEST (ddr));
2798
2799 /* The dependence is carried by the outermost loop. Example:
2800 | loop_1
2801 | A[{4, +, 1}_1]
2802 | loop_2
2803 | A[{5, +, 1}_2]
2804 | endloop_2
2805 | endloop_1
2806 In this case, the dependence is carried by loop_1. */
2807 index = index_a < index_b ? index_a : index_b;
2808 *index_carry = MIN (index, *index_carry);
2809
2810 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2811 {
2812 non_affine_dependence_relation (ddr);
2813 return false;
2814 }
2815
2816 dist = int_cst_value (SUB_DISTANCE (subscript));
2817
2818 /* This is the subscript coupling test. If we have already
2819 recorded a distance for this loop (a distance coming from
2820 another subscript), it should be the same. For example,
2821 in the following code, there is no dependence:
2822
2823 | loop i = 0, N, 1
2824 | T[i+1][i] = ...
2825 | ... = T[i][i]
2826 | endloop
2827 */
2828 if (init_v[index] != 0 && dist_v[index] != dist)
2829 {
2830 finalize_ddr_dependent (ddr, chrec_known);
2831 return false;
2832 }
2833
2834 dist_v[index] = dist;
2835 init_v[index] = 1;
2836 *init_b = true;
2837 }
2838 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2839 {
2840 /* This can be for example an affine vs. constant dependence
2841 (T[i] vs. T[3]) that is not an affine dependence and is
2842 not representable as a distance vector. */
2843 non_affine_dependence_relation (ddr);
2844 return false;
2845 }
2846 }
2847
2848 return true;
2849 }
2850
2851 /* Return true when the DDR contains only constant access functions. */
2852
2853 static bool
2854 constant_access_functions (const struct data_dependence_relation *ddr)
2855 {
2856 unsigned i;
2857
2858 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2859 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2860 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2861 return false;
2862
2863 return true;
2864 }
2865
2866 /* Helper function for the case where DDR_A and DDR_B are the same
2867 multivariate access function with a constant step. For an example
2868 see pr34635-1.c. */
2869
2870 static void
2871 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2872 {
2873 int x_1, x_2;
2874 tree c_1 = CHREC_LEFT (c_2);
2875 tree c_0 = CHREC_LEFT (c_1);
2876 lambda_vector dist_v;
2877 int v1, v2, cd;
2878
2879 /* Polynomials with more than 2 variables are not handled yet. When
2880 the evolution steps are parameters, it is not possible to
2881 represent the dependence using classical distance vectors. */
2882 if (TREE_CODE (c_0) != INTEGER_CST
2883 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2884 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2885 {
2886 DDR_AFFINE_P (ddr) = false;
2887 return;
2888 }
2889
2890 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2891 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2892
2893 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2894 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2895 v1 = int_cst_value (CHREC_RIGHT (c_1));
2896 v2 = int_cst_value (CHREC_RIGHT (c_2));
2897 cd = gcd (v1, v2);
2898 v1 /= cd;
2899 v2 /= cd;
2900
2901 if (v2 < 0)
2902 {
2903 v2 = -v2;
2904 v1 = -v1;
2905 }
2906
2907 dist_v[x_1] = v2;
2908 dist_v[x_2] = -v1;
2909 save_dist_v (ddr, dist_v);
2910
2911 add_outer_distances (ddr, dist_v, x_1);
2912 }
2913
2914 /* Helper function for the case where DDR_A and DDR_B are the same
2915 access functions. */
2916
2917 static void
2918 add_other_self_distances (struct data_dependence_relation *ddr)
2919 {
2920 lambda_vector dist_v;
2921 unsigned i;
2922 int index_carry = DDR_NB_LOOPS (ddr);
2923
2924 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2925 {
2926 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2927
2928 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2929 {
2930 if (!evolution_function_is_univariate_p (access_fun))
2931 {
2932 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2933 {
2934 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2935 return;
2936 }
2937
2938 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2939
2940 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2941 add_multivariate_self_dist (ddr, access_fun);
2942 else
2943 /* The evolution step is not constant: it varies in
2944 the outer loop, so this cannot be represented by a
2945 distance vector. For example in pr34635.c the
2946 evolution is {0, +, {0, +, 4}_1}_2. */
2947 DDR_AFFINE_P (ddr) = false;
2948
2949 return;
2950 }
2951
2952 index_carry = MIN (index_carry,
2953 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2954 DDR_LOOP_NEST (ddr)));
2955 }
2956 }
2957
2958 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2959 add_outer_distances (ddr, dist_v, index_carry);
2960 }
2961
2962 static void
2963 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2964 {
2965 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2966
2967 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2968 save_dist_v (ddr, dist_v);
2969 }
2970
2971 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2972 is the case for example when access functions are the same and
2973 equal to a constant, as in:
2974
2975 | loop_1
2976 | A[3] = ...
2977 | ... = A[3]
2978 | endloop_1
2979
2980 in which case the distance vectors are (0) and (1). */
2981
2982 static void
2983 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2984 {
2985 unsigned i, j;
2986
2987 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2988 {
2989 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2990 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2991 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2992
2993 for (j = 0; j < ca->n; j++)
2994 if (affine_function_zero_p (ca->fns[j]))
2995 {
2996 insert_innermost_unit_dist_vector (ddr);
2997 return;
2998 }
2999
3000 for (j = 0; j < cb->n; j++)
3001 if (affine_function_zero_p (cb->fns[j]))
3002 {
3003 insert_innermost_unit_dist_vector (ddr);
3004 return;
3005 }
3006 }
3007 }
3008
3009 /* Compute the classic per loop distance vector. DDR is the data
3010 dependence relation to build a vector from. Return false when fail
3011 to represent the data dependence as a distance vector. */
3012
3013 static bool
3014 build_classic_dist_vector (struct data_dependence_relation *ddr,
3015 struct loop *loop_nest)
3016 {
3017 bool init_b = false;
3018 int index_carry = DDR_NB_LOOPS (ddr);
3019 lambda_vector dist_v;
3020
3021 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3022 return false;
3023
3024 if (same_access_functions (ddr))
3025 {
3026 /* Save the 0 vector. */
3027 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3028 save_dist_v (ddr, dist_v);
3029
3030 if (constant_access_functions (ddr))
3031 add_distance_for_zero_overlaps (ddr);
3032
3033 if (DDR_NB_LOOPS (ddr) > 1)
3034 add_other_self_distances (ddr);
3035
3036 return true;
3037 }
3038
3039 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3040 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3041 dist_v, &init_b, &index_carry))
3042 return false;
3043
3044 /* Save the distance vector if we initialized one. */
3045 if (init_b)
3046 {
3047 /* Verify a basic constraint: classic distance vectors should
3048 always be lexicographically positive.
3049
3050 Data references are collected in the order of execution of
3051 the program, thus for the following loop
3052
3053 | for (i = 1; i < 100; i++)
3054 | for (j = 1; j < 100; j++)
3055 | {
3056 | t = T[j+1][i-1]; // A
3057 | T[j][i] = t + 2; // B
3058 | }
3059
3060 references are collected following the direction of the wind:
3061 A then B. The data dependence tests are performed also
3062 following this order, such that we're looking at the distance
3063 separating the elements accessed by A from the elements later
3064 accessed by B. But in this example, the distance returned by
3065 test_dep (A, B) is lexicographically negative (-1, 1), that
3066 means that the access A occurs later than B with respect to
3067 the outer loop, ie. we're actually looking upwind. In this
3068 case we solve test_dep (B, A) looking downwind to the
3069 lexicographically positive solution, that returns the
3070 distance vector (1, -1). */
3071 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3072 {
3073 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3074 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3075 loop_nest))
3076 return false;
3077 compute_subscript_distance (ddr);
3078 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3079 save_v, &init_b, &index_carry))
3080 return false;
3081 save_dist_v (ddr, save_v);
3082 DDR_REVERSED_P (ddr) = true;
3083
3084 /* In this case there is a dependence forward for all the
3085 outer loops:
3086
3087 | for (k = 1; k < 100; k++)
3088 | for (i = 1; i < 100; i++)
3089 | for (j = 1; j < 100; j++)
3090 | {
3091 | t = T[j+1][i-1]; // A
3092 | T[j][i] = t + 2; // B
3093 | }
3094
3095 the vectors are:
3096 (0, 1, -1)
3097 (1, 1, -1)
3098 (1, -1, 1)
3099 */
3100 if (DDR_NB_LOOPS (ddr) > 1)
3101 {
3102 add_outer_distances (ddr, save_v, index_carry);
3103 add_outer_distances (ddr, dist_v, index_carry);
3104 }
3105 }
3106 else
3107 {
3108 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3109 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3110
3111 if (DDR_NB_LOOPS (ddr) > 1)
3112 {
3113 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3114
3115 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3116 DDR_A (ddr), loop_nest))
3117 return false;
3118 compute_subscript_distance (ddr);
3119 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3120 opposite_v, &init_b,
3121 &index_carry))
3122 return false;
3123
3124 save_dist_v (ddr, save_v);
3125 add_outer_distances (ddr, dist_v, index_carry);
3126 add_outer_distances (ddr, opposite_v, index_carry);
3127 }
3128 else
3129 save_dist_v (ddr, save_v);
3130 }
3131 }
3132 else
3133 {
3134 /* There is a distance of 1 on all the outer loops: Example:
3135 there is a dependence of distance 1 on loop_1 for the array A.
3136
3137 | loop_1
3138 | A[5] = ...
3139 | endloop
3140 */
3141 add_outer_distances (ddr, dist_v,
3142 lambda_vector_first_nz (dist_v,
3143 DDR_NB_LOOPS (ddr), 0));
3144 }
3145
3146 if (dump_file && (dump_flags & TDF_DETAILS))
3147 {
3148 unsigned i;
3149
3150 fprintf (dump_file, "(build_classic_dist_vector\n");
3151 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3152 {
3153 fprintf (dump_file, " dist_vector = (");
3154 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3155 DDR_NB_LOOPS (ddr));
3156 fprintf (dump_file, " )\n");
3157 }
3158 fprintf (dump_file, ")\n");
3159 }
3160
3161 return true;
3162 }
3163
3164 /* Return the direction for a given distance.
3165 FIXME: Computing dir this way is suboptimal, since dir can catch
3166 cases that dist is unable to represent. */
3167
3168 static inline enum data_dependence_direction
3169 dir_from_dist (int dist)
3170 {
3171 if (dist > 0)
3172 return dir_positive;
3173 else if (dist < 0)
3174 return dir_negative;
3175 else
3176 return dir_equal;
3177 }
3178
3179 /* Compute the classic per loop direction vector. DDR is the data
3180 dependence relation to build a vector from. */
3181
3182 static void
3183 build_classic_dir_vector (struct data_dependence_relation *ddr)
3184 {
3185 unsigned i, j;
3186 lambda_vector dist_v;
3187
3188 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3189 {
3190 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3191
3192 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3193 dir_v[j] = dir_from_dist (dist_v[j]);
3194
3195 save_dir_v (ddr, dir_v);
3196 }
3197 }
3198
3199 /* Helper function. Returns true when there is a dependence between
3200 data references DRA and DRB. */
3201
3202 static bool
3203 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3204 struct data_reference *dra,
3205 struct data_reference *drb,
3206 struct loop *loop_nest)
3207 {
3208 unsigned int i;
3209 tree last_conflicts;
3210 struct subscript *subscript;
3211
3212 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3213 i++)
3214 {
3215 conflict_function *overlaps_a, *overlaps_b;
3216
3217 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3218 DR_ACCESS_FN (drb, i),
3219 &overlaps_a, &overlaps_b,
3220 &last_conflicts, loop_nest);
3221
3222 if (CF_NOT_KNOWN_P (overlaps_a)
3223 || CF_NOT_KNOWN_P (overlaps_b))
3224 {
3225 finalize_ddr_dependent (ddr, chrec_dont_know);
3226 dependence_stats.num_dependence_undetermined++;
3227 free_conflict_function (overlaps_a);
3228 free_conflict_function (overlaps_b);
3229 return false;
3230 }
3231
3232 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3233 || CF_NO_DEPENDENCE_P (overlaps_b))
3234 {
3235 finalize_ddr_dependent (ddr, chrec_known);
3236 dependence_stats.num_dependence_independent++;
3237 free_conflict_function (overlaps_a);
3238 free_conflict_function (overlaps_b);
3239 return false;
3240 }
3241
3242 else
3243 {
3244 if (SUB_CONFLICTS_IN_A (subscript))
3245 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3246 if (SUB_CONFLICTS_IN_B (subscript))
3247 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3248
3249 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3250 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3251 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3252 }
3253 }
3254
3255 return true;
3256 }
3257
3258 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3259
3260 static void
3261 subscript_dependence_tester (struct data_dependence_relation *ddr,
3262 struct loop *loop_nest)
3263 {
3264
3265 if (dump_file && (dump_flags & TDF_DETAILS))
3266 fprintf (dump_file, "(subscript_dependence_tester \n");
3267
3268 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3269 dependence_stats.num_dependence_dependent++;
3270
3271 compute_subscript_distance (ddr);
3272 if (build_classic_dist_vector (ddr, loop_nest))
3273 build_classic_dir_vector (ddr);
3274
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3276 fprintf (dump_file, ")\n");
3277 }
3278
3279 /* Returns true when all the access functions of A are affine or
3280 constant with respect to LOOP_NEST. */
3281
3282 static bool
3283 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3284 const struct loop *loop_nest)
3285 {
3286 unsigned int i;
3287 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3288 tree t;
3289
3290 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3291 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3292 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3293 return false;
3294
3295 return true;
3296 }
3297
3298 /* Return true if we can create an affine data-ref for OP in STMT. */
3299
3300 bool
3301 stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op)
3302 {
3303 data_reference_p dr;
3304 bool res = true;
3305
3306 dr = create_data_ref (loop, op, stmt, true);
3307 if (!access_functions_are_affine_or_constant_p (dr, loop))
3308 res = false;
3309
3310 free_data_ref (dr);
3311 return res;
3312 }
3313
3314 /* Initializes an equation for an OMEGA problem using the information
3315 contained in the ACCESS_FUN. Returns true when the operation
3316 succeeded.
3317
3318 PB is the omega constraint system.
3319 EQ is the number of the equation to be initialized.
3320 OFFSET is used for shifting the variables names in the constraints:
3321 a constrain is composed of 2 * the number of variables surrounding
3322 dependence accesses. OFFSET is set either to 0 for the first n variables,
3323 then it is set to n.
3324 ACCESS_FUN is expected to be an affine chrec. */
3325
3326 static bool
3327 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3328 unsigned int offset, tree access_fun,
3329 struct data_dependence_relation *ddr)
3330 {
3331 switch (TREE_CODE (access_fun))
3332 {
3333 case POLYNOMIAL_CHREC:
3334 {
3335 tree left = CHREC_LEFT (access_fun);
3336 tree right = CHREC_RIGHT (access_fun);
3337 int var = CHREC_VARIABLE (access_fun);
3338 unsigned var_idx;
3339
3340 if (TREE_CODE (right) != INTEGER_CST)
3341 return false;
3342
3343 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3344 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3345
3346 /* Compute the innermost loop index. */
3347 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3348
3349 if (offset == 0)
3350 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3351 += int_cst_value (right);
3352
3353 switch (TREE_CODE (left))
3354 {
3355 case POLYNOMIAL_CHREC:
3356 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3357
3358 case INTEGER_CST:
3359 pb->eqs[eq].coef[0] += int_cst_value (left);
3360 return true;
3361
3362 default:
3363 return false;
3364 }
3365 }
3366
3367 case INTEGER_CST:
3368 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3369 return true;
3370
3371 default:
3372 return false;
3373 }
3374 }
3375
3376 /* As explained in the comments preceding init_omega_for_ddr, we have
3377 to set up a system for each loop level, setting outer loops
3378 variation to zero, and current loop variation to positive or zero.
3379 Save each lexico positive distance vector. */
3380
3381 static void
3382 omega_extract_distance_vectors (omega_pb pb,
3383 struct data_dependence_relation *ddr)
3384 {
3385 int eq, geq;
3386 unsigned i, j;
3387 struct loop *loopi, *loopj;
3388 enum omega_result res;
3389
3390 /* Set a new problem for each loop in the nest. The basis is the
3391 problem that we have initialized until now. On top of this we
3392 add new constraints. */
3393 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3394 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3395 {
3396 int dist = 0;
3397 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3398 DDR_NB_LOOPS (ddr));
3399
3400 omega_copy_problem (copy, pb);
3401
3402 /* For all the outer loops "loop_j", add "dj = 0". */
3403 for (j = 0;
3404 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3405 {
3406 eq = omega_add_zero_eq (copy, omega_black);
3407 copy->eqs[eq].coef[j + 1] = 1;
3408 }
3409
3410 /* For "loop_i", add "0 <= di". */
3411 geq = omega_add_zero_geq (copy, omega_black);
3412 copy->geqs[geq].coef[i + 1] = 1;
3413
3414 /* Reduce the constraint system, and test that the current
3415 problem is feasible. */
3416 res = omega_simplify_problem (copy);
3417 if (res == omega_false
3418 || res == omega_unknown
3419 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3420 goto next_problem;
3421
3422 for (eq = 0; eq < copy->num_subs; eq++)
3423 if (copy->subs[eq].key == (int) i + 1)
3424 {
3425 dist = copy->subs[eq].coef[0];
3426 goto found_dist;
3427 }
3428
3429 if (dist == 0)
3430 {
3431 /* Reinitialize problem... */
3432 omega_copy_problem (copy, pb);
3433 for (j = 0;
3434 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3435 {
3436 eq = omega_add_zero_eq (copy, omega_black);
3437 copy->eqs[eq].coef[j + 1] = 1;
3438 }
3439
3440 /* ..., but this time "di = 1". */
3441 eq = omega_add_zero_eq (copy, omega_black);
3442 copy->eqs[eq].coef[i + 1] = 1;
3443 copy->eqs[eq].coef[0] = -1;
3444
3445 res = omega_simplify_problem (copy);
3446 if (res == omega_false
3447 || res == omega_unknown
3448 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3449 goto next_problem;
3450
3451 for (eq = 0; eq < copy->num_subs; eq++)
3452 if (copy->subs[eq].key == (int) i + 1)
3453 {
3454 dist = copy->subs[eq].coef[0];
3455 goto found_dist;
3456 }
3457 }
3458
3459 found_dist:;
3460 /* Save the lexicographically positive distance vector. */
3461 if (dist >= 0)
3462 {
3463 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3464 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3465
3466 dist_v[i] = dist;
3467
3468 for (eq = 0; eq < copy->num_subs; eq++)
3469 if (copy->subs[eq].key > 0)
3470 {
3471 dist = copy->subs[eq].coef[0];
3472 dist_v[copy->subs[eq].key - 1] = dist;
3473 }
3474
3475 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3476 dir_v[j] = dir_from_dist (dist_v[j]);
3477
3478 save_dist_v (ddr, dist_v);
3479 save_dir_v (ddr, dir_v);
3480 }
3481
3482 next_problem:;
3483 omega_free_problem (copy);
3484 }
3485 }
3486
3487 /* This is called for each subscript of a tuple of data references:
3488 insert an equality for representing the conflicts. */
3489
3490 static bool
3491 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3492 struct data_dependence_relation *ddr,
3493 omega_pb pb, bool *maybe_dependent)
3494 {
3495 int eq;
3496 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3497 TREE_TYPE (access_fun_b));
3498 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3499 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3500 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3501
3502 /* When the fun_a - fun_b is not constant, the dependence is not
3503 captured by the classic distance vector representation. */
3504 if (TREE_CODE (difference) != INTEGER_CST)
3505 return false;
3506
3507 /* ZIV test. */
3508 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3509 {
3510 /* There is no dependence. */
3511 *maybe_dependent = false;
3512 return true;
3513 }
3514
3515 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3516
3517 eq = omega_add_zero_eq (pb, omega_black);
3518 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3519 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3520 /* There is probably a dependence, but the system of
3521 constraints cannot be built: answer "don't know". */
3522 return false;
3523
3524 /* GCD test. */
3525 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3526 && !int_divides_p (lambda_vector_gcd
3527 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3528 2 * DDR_NB_LOOPS (ddr)),
3529 pb->eqs[eq].coef[0]))
3530 {
3531 /* There is no dependence. */
3532 *maybe_dependent = false;
3533 return true;
3534 }
3535
3536 return true;
3537 }
3538
3539 /* Helper function, same as init_omega_for_ddr but specialized for
3540 data references A and B. */
3541
3542 static bool
3543 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3544 struct data_dependence_relation *ddr,
3545 omega_pb pb, bool *maybe_dependent)
3546 {
3547 unsigned i;
3548 int ineq;
3549 struct loop *loopi;
3550 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3551
3552 /* Insert an equality per subscript. */
3553 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3554 {
3555 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3556 ddr, pb, maybe_dependent))
3557 return false;
3558 else if (*maybe_dependent == false)
3559 {
3560 /* There is no dependence. */
3561 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3562 return true;
3563 }
3564 }
3565
3566 /* Insert inequalities: constraints corresponding to the iteration
3567 domain, i.e. the loops surrounding the references "loop_x" and
3568 the distance variables "dx". The layout of the OMEGA
3569 representation is as follows:
3570 - coef[0] is the constant
3571 - coef[1..nb_loops] are the protected variables that will not be
3572 removed by the solver: the "dx"
3573 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3574 */
3575 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3576 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3577 {
3578 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3579
3580 /* 0 <= loop_x */
3581 ineq = omega_add_zero_geq (pb, omega_black);
3582 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3583
3584 /* 0 <= loop_x + dx */
3585 ineq = omega_add_zero_geq (pb, omega_black);
3586 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3587 pb->geqs[ineq].coef[i + 1] = 1;
3588
3589 if (nbi != -1)
3590 {
3591 /* loop_x <= nb_iters */
3592 ineq = omega_add_zero_geq (pb, omega_black);
3593 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3594 pb->geqs[ineq].coef[0] = nbi;
3595
3596 /* loop_x + dx <= nb_iters */
3597 ineq = omega_add_zero_geq (pb, omega_black);
3598 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3599 pb->geqs[ineq].coef[i + 1] = -1;
3600 pb->geqs[ineq].coef[0] = nbi;
3601
3602 /* A step "dx" bigger than nb_iters is not feasible, so
3603 add "0 <= nb_iters + dx", */
3604 ineq = omega_add_zero_geq (pb, omega_black);
3605 pb->geqs[ineq].coef[i + 1] = 1;
3606 pb->geqs[ineq].coef[0] = nbi;
3607 /* and "dx <= nb_iters". */
3608 ineq = omega_add_zero_geq (pb, omega_black);
3609 pb->geqs[ineq].coef[i + 1] = -1;
3610 pb->geqs[ineq].coef[0] = nbi;
3611 }
3612 }
3613
3614 omega_extract_distance_vectors (pb, ddr);
3615
3616 return true;
3617 }
3618
3619 /* Sets up the Omega dependence problem for the data dependence
3620 relation DDR. Returns false when the constraint system cannot be
3621 built, ie. when the test answers "don't know". Returns true
3622 otherwise, and when independence has been proved (using one of the
3623 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3624 set MAYBE_DEPENDENT to true.
3625
3626 Example: for setting up the dependence system corresponding to the
3627 conflicting accesses
3628
3629 | loop_i
3630 | loop_j
3631 | A[i, i+1] = ...
3632 | ... A[2*j, 2*(i + j)]
3633 | endloop_j
3634 | endloop_i
3635
3636 the following constraints come from the iteration domain:
3637
3638 0 <= i <= Ni
3639 0 <= i + di <= Ni
3640 0 <= j <= Nj
3641 0 <= j + dj <= Nj
3642
3643 where di, dj are the distance variables. The constraints
3644 representing the conflicting elements are:
3645
3646 i = 2 * (j + dj)
3647 i + 1 = 2 * (i + di + j + dj)
3648
3649 For asking that the resulting distance vector (di, dj) be
3650 lexicographically positive, we insert the constraint "di >= 0". If
3651 "di = 0" in the solution, we fix that component to zero, and we
3652 look at the inner loops: we set a new problem where all the outer
3653 loop distances are zero, and fix this inner component to be
3654 positive. When one of the components is positive, we save that
3655 distance, and set a new problem where the distance on this loop is
3656 zero, searching for other distances in the inner loops. Here is
3657 the classic example that illustrates that we have to set for each
3658 inner loop a new problem:
3659
3660 | loop_1
3661 | loop_2
3662 | A[10]
3663 | endloop_2
3664 | endloop_1
3665
3666 we have to save two distances (1, 0) and (0, 1).
3667
3668 Given two array references, refA and refB, we have to set the
3669 dependence problem twice, refA vs. refB and refB vs. refA, and we
3670 cannot do a single test, as refB might occur before refA in the
3671 inner loops, and the contrary when considering outer loops: ex.
3672
3673 | loop_0
3674 | loop_1
3675 | loop_2
3676 | T[{1,+,1}_2][{1,+,1}_1] // refA
3677 | T[{2,+,1}_2][{0,+,1}_1] // refB
3678 | endloop_2
3679 | endloop_1
3680 | endloop_0
3681
3682 refB touches the elements in T before refA, and thus for the same
3683 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3684 but for successive loop_0 iterations, we have (1, -1, 1)
3685
3686 The Omega solver expects the distance variables ("di" in the
3687 previous example) to come first in the constraint system (as
3688 variables to be protected, or "safe" variables), the constraint
3689 system is built using the following layout:
3690
3691 "cst | distance vars | index vars".
3692 */
3693
3694 static bool
3695 init_omega_for_ddr (struct data_dependence_relation *ddr,
3696 bool *maybe_dependent)
3697 {
3698 omega_pb pb;
3699 bool res = false;
3700
3701 *maybe_dependent = true;
3702
3703 if (same_access_functions (ddr))
3704 {
3705 unsigned j;
3706 lambda_vector dir_v;
3707
3708 /* Save the 0 vector. */
3709 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3710 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3711 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3712 dir_v[j] = dir_equal;
3713 save_dir_v (ddr, dir_v);
3714
3715 /* Save the dependences carried by outer loops. */
3716 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3717 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3718 maybe_dependent);
3719 omega_free_problem (pb);
3720 return res;
3721 }
3722
3723 /* Omega expects the protected variables (those that have to be kept
3724 after elimination) to appear first in the constraint system.
3725 These variables are the distance variables. In the following
3726 initialization we declare NB_LOOPS safe variables, and the total
3727 number of variables for the constraint system is 2*NB_LOOPS. */
3728 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3729 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3730 maybe_dependent);
3731 omega_free_problem (pb);
3732
3733 /* Stop computation if not decidable, or no dependence. */
3734 if (res == false || *maybe_dependent == false)
3735 return res;
3736
3737 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3738 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3739 maybe_dependent);
3740 omega_free_problem (pb);
3741
3742 return res;
3743 }
3744
3745 /* Return true when DDR contains the same information as that stored
3746 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3747
3748 static bool
3749 ddr_consistent_p (FILE *file,
3750 struct data_dependence_relation *ddr,
3751 VEC (lambda_vector, heap) *dist_vects,
3752 VEC (lambda_vector, heap) *dir_vects)
3753 {
3754 unsigned int i, j;
3755
3756 /* If dump_file is set, output there. */
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 file = dump_file;
3759
3760 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3761 {
3762 lambda_vector b_dist_v;
3763 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3764 VEC_length (lambda_vector, dist_vects),
3765 DDR_NUM_DIST_VECTS (ddr));
3766
3767 fprintf (file, "Banerjee dist vectors:\n");
3768 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3769 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3770
3771 fprintf (file, "Omega dist vectors:\n");
3772 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3773 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3774
3775 fprintf (file, "data dependence relation:\n");
3776 dump_data_dependence_relation (file, ddr);
3777
3778 fprintf (file, ")\n");
3779 return false;
3780 }
3781
3782 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3783 {
3784 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3785 VEC_length (lambda_vector, dir_vects),
3786 DDR_NUM_DIR_VECTS (ddr));
3787 return false;
3788 }
3789
3790 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3791 {
3792 lambda_vector a_dist_v;
3793 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3794
3795 /* Distance vectors are not ordered in the same way in the DDR
3796 and in the DIST_VECTS: search for a matching vector. */
3797 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3798 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3799 break;
3800
3801 if (j == VEC_length (lambda_vector, dist_vects))
3802 {
3803 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3804 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3805 fprintf (file, "not found in Omega dist vectors:\n");
3806 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3807 fprintf (file, "data dependence relation:\n");
3808 dump_data_dependence_relation (file, ddr);
3809 fprintf (file, ")\n");
3810 }
3811 }
3812
3813 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3814 {
3815 lambda_vector a_dir_v;
3816 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3817
3818 /* Direction vectors are not ordered in the same way in the DDR
3819 and in the DIR_VECTS: search for a matching vector. */
3820 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3821 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3822 break;
3823
3824 if (j == VEC_length (lambda_vector, dist_vects))
3825 {
3826 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3827 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3828 fprintf (file, "not found in Omega dir vectors:\n");
3829 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3830 fprintf (file, "data dependence relation:\n");
3831 dump_data_dependence_relation (file, ddr);
3832 fprintf (file, ")\n");
3833 }
3834 }
3835
3836 return true;
3837 }
3838
3839 /* This computes the affine dependence relation between A and B with
3840 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3841 independence between two accesses, while CHREC_DONT_KNOW is used
3842 for representing the unknown relation.
3843
3844 Note that it is possible to stop the computation of the dependence
3845 relation the first time we detect a CHREC_KNOWN element for a given
3846 subscript. */
3847
3848 static void
3849 compute_affine_dependence (struct data_dependence_relation *ddr,
3850 struct loop *loop_nest)
3851 {
3852 struct data_reference *dra = DDR_A (ddr);
3853 struct data_reference *drb = DDR_B (ddr);
3854
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3856 {
3857 fprintf (dump_file, "(compute_affine_dependence\n");
3858 fprintf (dump_file, " (stmt_a = \n");
3859 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3860 fprintf (dump_file, ")\n (stmt_b = \n");
3861 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3862 fprintf (dump_file, ")\n");
3863 }
3864
3865 /* Analyze only when the dependence relation is not yet known. */
3866 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3867 && !DDR_SELF_REFERENCE (ddr))
3868 {
3869 dependence_stats.num_dependence_tests++;
3870
3871 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3872 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3873 {
3874 if (flag_check_data_deps)
3875 {
3876 /* Compute the dependences using the first algorithm. */
3877 subscript_dependence_tester (ddr, loop_nest);
3878
3879 if (dump_file && (dump_flags & TDF_DETAILS))
3880 {
3881 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3882 dump_data_dependence_relation (dump_file, ddr);
3883 }
3884
3885 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3886 {
3887 bool maybe_dependent;
3888 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3889
3890 /* Save the result of the first DD analyzer. */
3891 dist_vects = DDR_DIST_VECTS (ddr);
3892 dir_vects = DDR_DIR_VECTS (ddr);
3893
3894 /* Reset the information. */
3895 DDR_DIST_VECTS (ddr) = NULL;
3896 DDR_DIR_VECTS (ddr) = NULL;
3897
3898 /* Compute the same information using Omega. */
3899 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3900 goto csys_dont_know;
3901
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 {
3904 fprintf (dump_file, "Omega Analyzer\n");
3905 dump_data_dependence_relation (dump_file, ddr);
3906 }
3907
3908 /* Check that we get the same information. */
3909 if (maybe_dependent)
3910 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3911 dir_vects));
3912 }
3913 }
3914 else
3915 subscript_dependence_tester (ddr, loop_nest);
3916 }
3917
3918 /* As a last case, if the dependence cannot be determined, or if
3919 the dependence is considered too difficult to determine, answer
3920 "don't know". */
3921 else
3922 {
3923 csys_dont_know:;
3924 dependence_stats.num_dependence_undetermined++;
3925
3926 if (dump_file && (dump_flags & TDF_DETAILS))
3927 {
3928 fprintf (dump_file, "Data ref a:\n");
3929 dump_data_reference (dump_file, dra);
3930 fprintf (dump_file, "Data ref b:\n");
3931 dump_data_reference (dump_file, drb);
3932 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3933 }
3934 finalize_ddr_dependent (ddr, chrec_dont_know);
3935 }
3936 }
3937
3938 if (dump_file && (dump_flags & TDF_DETAILS))
3939 fprintf (dump_file, ")\n");
3940 }
3941
3942 /* This computes the dependence relation for the same data
3943 reference into DDR. */
3944
3945 static void
3946 compute_self_dependence (struct data_dependence_relation *ddr)
3947 {
3948 unsigned int i;
3949 struct subscript *subscript;
3950
3951 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3952 return;
3953
3954 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3955 i++)
3956 {
3957 if (SUB_CONFLICTS_IN_A (subscript))
3958 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3959 if (SUB_CONFLICTS_IN_B (subscript))
3960 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3961
3962 /* The accessed index overlaps for each iteration. */
3963 SUB_CONFLICTS_IN_A (subscript)
3964 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3965 SUB_CONFLICTS_IN_B (subscript)
3966 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3967 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3968 }
3969
3970 /* The distance vector is the zero vector. */
3971 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3972 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3973 }
3974
3975 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3976 the data references in DATAREFS, in the LOOP_NEST. When
3977 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3978 relations. */
3979
3980 void
3981 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3982 VEC (ddr_p, heap) **dependence_relations,
3983 VEC (loop_p, heap) *loop_nest,
3984 bool compute_self_and_rr)
3985 {
3986 struct data_dependence_relation *ddr;
3987 struct data_reference *a, *b;
3988 unsigned int i, j;
3989
3990 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3991 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3992 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3993 {
3994 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3995 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3996 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3997 }
3998
3999 if (compute_self_and_rr)
4000 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4001 {
4002 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4003 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4004 compute_self_dependence (ddr);
4005 }
4006 }
4007
4008 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4009 true if STMT clobbers memory, false otherwise. */
4010
4011 bool
4012 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4013 {
4014 bool clobbers_memory = false;
4015 data_ref_loc *ref;
4016 tree *op0, *op1;
4017 enum gimple_code stmt_code = gimple_code (stmt);
4018
4019 *references = NULL;
4020
4021 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4022 Calls have side-effects, except those to const or pure
4023 functions. */
4024 if ((stmt_code == GIMPLE_CALL
4025 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4026 || (stmt_code == GIMPLE_ASM
4027 && gimple_asm_volatile_p (stmt)))
4028 clobbers_memory = true;
4029
4030 if (!gimple_vuse (stmt))
4031 return clobbers_memory;
4032
4033 if (stmt_code == GIMPLE_ASSIGN)
4034 {
4035 tree base;
4036 op0 = gimple_assign_lhs_ptr (stmt);
4037 op1 = gimple_assign_rhs1_ptr (stmt);
4038
4039 if (DECL_P (*op1)
4040 || (REFERENCE_CLASS_P (*op1)
4041 && (base = get_base_address (*op1))
4042 && TREE_CODE (base) != SSA_NAME))
4043 {
4044 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4045 ref->pos = op1;
4046 ref->is_read = true;
4047 }
4048
4049 if (DECL_P (*op0)
4050 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4051 {
4052 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4053 ref->pos = op0;
4054 ref->is_read = false;
4055 }
4056 }
4057 else if (stmt_code == GIMPLE_CALL)
4058 {
4059 unsigned i, n = gimple_call_num_args (stmt);
4060
4061 for (i = 0; i < n; i++)
4062 {
4063 op0 = gimple_call_arg_ptr (stmt, i);
4064
4065 if (DECL_P (*op0)
4066 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4067 {
4068 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4069 ref->pos = op0;
4070 ref->is_read = true;
4071 }
4072 }
4073 }
4074
4075 return clobbers_memory;
4076 }
4077
4078 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4079 reference, returns false, otherwise returns true. NEST is the outermost
4080 loop of the loop nest in which the references should be analyzed. */
4081
4082 bool
4083 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4084 VEC (data_reference_p, heap) **datarefs)
4085 {
4086 unsigned i;
4087 VEC (data_ref_loc, heap) *references;
4088 data_ref_loc *ref;
4089 bool ret = true;
4090 data_reference_p dr;
4091
4092 if (get_references_in_stmt (stmt, &references))
4093 {
4094 VEC_free (data_ref_loc, heap, references);
4095 return false;
4096 }
4097
4098 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4099 {
4100 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4101 gcc_assert (dr != NULL);
4102
4103 /* FIXME -- data dependence analysis does not work correctly for objects with
4104 invariant addresses. Let us fail here until the problem is fixed. */
4105 if (dr_address_invariant_p (dr))
4106 {
4107 free_data_ref (dr);
4108 if (dump_file && (dump_flags & TDF_DETAILS))
4109 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4110 ret = false;
4111 break;
4112 }
4113
4114 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4115 }
4116 VEC_free (data_ref_loc, heap, references);
4117 return ret;
4118 }
4119
4120 /* Search the data references in LOOP, and record the information into
4121 DATAREFS. Returns chrec_dont_know when failing to analyze a
4122 difficult case, returns NULL_TREE otherwise.
4123
4124 TODO: This function should be made smarter so that it can handle address
4125 arithmetic as if they were array accesses, etc. */
4126
4127 tree
4128 find_data_references_in_loop (struct loop *loop,
4129 VEC (data_reference_p, heap) **datarefs)
4130 {
4131 basic_block bb, *bbs;
4132 unsigned int i;
4133 gimple_stmt_iterator bsi;
4134
4135 bbs = get_loop_body_in_dom_order (loop);
4136
4137 for (i = 0; i < loop->num_nodes; i++)
4138 {
4139 bb = bbs[i];
4140
4141 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4142 {
4143 gimple stmt = gsi_stmt (bsi);
4144
4145 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4146 {
4147 struct data_reference *res;
4148 res = XCNEW (struct data_reference);
4149 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4150
4151 free (bbs);
4152 return chrec_dont_know;
4153 }
4154 }
4155 }
4156 free (bbs);
4157
4158 return NULL_TREE;
4159 }
4160
4161 /* Recursive helper function. */
4162
4163 static bool
4164 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4165 {
4166 /* Inner loops of the nest should not contain siblings. Example:
4167 when there are two consecutive loops,
4168
4169 | loop_0
4170 | loop_1
4171 | A[{0, +, 1}_1]
4172 | endloop_1
4173 | loop_2
4174 | A[{0, +, 1}_2]
4175 | endloop_2
4176 | endloop_0
4177
4178 the dependence relation cannot be captured by the distance
4179 abstraction. */
4180 if (loop->next)
4181 return false;
4182
4183 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4184 if (loop->inner)
4185 return find_loop_nest_1 (loop->inner, loop_nest);
4186 return true;
4187 }
4188
4189 /* Return false when the LOOP is not well nested. Otherwise return
4190 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4191 contain the loops from the outermost to the innermost, as they will
4192 appear in the classic distance vector. */
4193
4194 bool
4195 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4196 {
4197 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4198 if (loop->inner)
4199 return find_loop_nest_1 (loop->inner, loop_nest);
4200 return true;
4201 }
4202
4203 /* Returns true when the data dependences have been computed, false otherwise.
4204 Given a loop nest LOOP, the following vectors are returned:
4205 DATAREFS is initialized to all the array elements contained in this loop,
4206 DEPENDENCE_RELATIONS contains the relations between the data references.
4207 Compute read-read and self relations if
4208 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4209
4210 bool
4211 compute_data_dependences_for_loop (struct loop *loop,
4212 bool compute_self_and_read_read_dependences,
4213 VEC (data_reference_p, heap) **datarefs,
4214 VEC (ddr_p, heap) **dependence_relations)
4215 {
4216 bool res = true;
4217 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4218
4219 memset (&dependence_stats, 0, sizeof (dependence_stats));
4220
4221 /* If the loop nest is not well formed, or one of the data references
4222 is not computable, give up without spending time to compute other
4223 dependences. */
4224 if (!loop
4225 || !find_loop_nest (loop, &vloops)
4226 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4227 {
4228 struct data_dependence_relation *ddr;
4229
4230 /* Insert a single relation into dependence_relations:
4231 chrec_dont_know. */
4232 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4233 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4234 res = false;
4235 }
4236 else
4237 compute_all_dependences (*datarefs, dependence_relations, vloops,
4238 compute_self_and_read_read_dependences);
4239
4240 if (dump_file && (dump_flags & TDF_STATS))
4241 {
4242 fprintf (dump_file, "Dependence tester statistics:\n");
4243
4244 fprintf (dump_file, "Number of dependence tests: %d\n",
4245 dependence_stats.num_dependence_tests);
4246 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4247 dependence_stats.num_dependence_dependent);
4248 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4249 dependence_stats.num_dependence_independent);
4250 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4251 dependence_stats.num_dependence_undetermined);
4252
4253 fprintf (dump_file, "Number of subscript tests: %d\n",
4254 dependence_stats.num_subscript_tests);
4255 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4256 dependence_stats.num_subscript_undetermined);
4257 fprintf (dump_file, "Number of same subscript function: %d\n",
4258 dependence_stats.num_same_subscript_function);
4259
4260 fprintf (dump_file, "Number of ziv tests: %d\n",
4261 dependence_stats.num_ziv);
4262 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4263 dependence_stats.num_ziv_dependent);
4264 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4265 dependence_stats.num_ziv_independent);
4266 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4267 dependence_stats.num_ziv_unimplemented);
4268
4269 fprintf (dump_file, "Number of siv tests: %d\n",
4270 dependence_stats.num_siv);
4271 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4272 dependence_stats.num_siv_dependent);
4273 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4274 dependence_stats.num_siv_independent);
4275 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4276 dependence_stats.num_siv_unimplemented);
4277
4278 fprintf (dump_file, "Number of miv tests: %d\n",
4279 dependence_stats.num_miv);
4280 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4281 dependence_stats.num_miv_dependent);
4282 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4283 dependence_stats.num_miv_independent);
4284 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4285 dependence_stats.num_miv_unimplemented);
4286 }
4287
4288 return res;
4289 }
4290
4291 /* Entry point (for testing only). Analyze all the data references
4292 and the dependence relations in LOOP.
4293
4294 The data references are computed first.
4295
4296 A relation on these nodes is represented by a complete graph. Some
4297 of the relations could be of no interest, thus the relations can be
4298 computed on demand.
4299
4300 In the following function we compute all the relations. This is
4301 just a first implementation that is here for:
4302 - for showing how to ask for the dependence relations,
4303 - for the debugging the whole dependence graph,
4304 - for the dejagnu testcases and maintenance.
4305
4306 It is possible to ask only for a part of the graph, avoiding to
4307 compute the whole dependence graph. The computed dependences are
4308 stored in a knowledge base (KB) such that later queries don't
4309 recompute the same information. The implementation of this KB is
4310 transparent to the optimizer, and thus the KB can be changed with a
4311 more efficient implementation, or the KB could be disabled. */
4312 static void
4313 analyze_all_data_dependences (struct loop *loop)
4314 {
4315 unsigned int i;
4316 int nb_data_refs = 10;
4317 VEC (data_reference_p, heap) *datarefs =
4318 VEC_alloc (data_reference_p, heap, nb_data_refs);
4319 VEC (ddr_p, heap) *dependence_relations =
4320 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4321
4322 /* Compute DDs on the whole function. */
4323 compute_data_dependences_for_loop (loop, false, &datarefs,
4324 &dependence_relations);
4325
4326 if (dump_file)
4327 {
4328 dump_data_dependence_relations (dump_file, dependence_relations);
4329 fprintf (dump_file, "\n\n");
4330
4331 if (dump_flags & TDF_DETAILS)
4332 dump_dist_dir_vectors (dump_file, dependence_relations);
4333
4334 if (dump_flags & TDF_STATS)
4335 {
4336 unsigned nb_top_relations = 0;
4337 unsigned nb_bot_relations = 0;
4338 unsigned nb_chrec_relations = 0;
4339 struct data_dependence_relation *ddr;
4340
4341 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4342 {
4343 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4344 nb_top_relations++;
4345
4346 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4347 nb_bot_relations++;
4348
4349 else
4350 nb_chrec_relations++;
4351 }
4352
4353 gather_stats_on_scev_database ();
4354 }
4355 }
4356
4357 free_dependence_relations (dependence_relations);
4358 free_data_refs (datarefs);
4359 }
4360
4361 /* Computes all the data dependences and check that the results of
4362 several analyzers are the same. */
4363
4364 void
4365 tree_check_data_deps (void)
4366 {
4367 loop_iterator li;
4368 struct loop *loop_nest;
4369
4370 FOR_EACH_LOOP (li, loop_nest, 0)
4371 analyze_all_data_dependences (loop_nest);
4372 }
4373
4374 /* Free the memory used by a data dependence relation DDR. */
4375
4376 void
4377 free_dependence_relation (struct data_dependence_relation *ddr)
4378 {
4379 if (ddr == NULL)
4380 return;
4381
4382 if (DDR_SUBSCRIPTS (ddr))
4383 free_subscripts (DDR_SUBSCRIPTS (ddr));
4384 if (DDR_DIST_VECTS (ddr))
4385 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4386 if (DDR_DIR_VECTS (ddr))
4387 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4388
4389 free (ddr);
4390 }
4391
4392 /* Free the memory used by the data dependence relations from
4393 DEPENDENCE_RELATIONS. */
4394
4395 void
4396 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4397 {
4398 unsigned int i;
4399 struct data_dependence_relation *ddr;
4400 VEC (loop_p, heap) *loop_nest = NULL;
4401
4402 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4403 {
4404 if (ddr == NULL)
4405 continue;
4406 if (loop_nest == NULL)
4407 loop_nest = DDR_LOOP_NEST (ddr);
4408 else
4409 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4410 || DDR_LOOP_NEST (ddr) == loop_nest);
4411 free_dependence_relation (ddr);
4412 }
4413
4414 if (loop_nest)
4415 VEC_free (loop_p, heap, loop_nest);
4416 VEC_free (ddr_p, heap, dependence_relations);
4417 }
4418
4419 /* Free the memory used by the data references from DATAREFS. */
4420
4421 void
4422 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4423 {
4424 unsigned int i;
4425 struct data_reference *dr;
4426
4427 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4428 free_data_ref (dr);
4429 VEC_free (data_reference_p, heap, datarefs);
4430 }
4431
4432 \f
4433
4434 /* Dump vertex I in RDG to FILE. */
4435
4436 void
4437 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4438 {
4439 struct vertex *v = &(rdg->vertices[i]);
4440 struct graph_edge *e;
4441
4442 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4443 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4444 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4445
4446 if (v->pred)
4447 for (e = v->pred; e; e = e->pred_next)
4448 fprintf (file, " %d", e->src);
4449
4450 fprintf (file, ") (out:");
4451
4452 if (v->succ)
4453 for (e = v->succ; e; e = e->succ_next)
4454 fprintf (file, " %d", e->dest);
4455
4456 fprintf (file, ") \n");
4457 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4458 fprintf (file, ")\n");
4459 }
4460
4461 /* Call dump_rdg_vertex on stderr. */
4462
4463 void
4464 debug_rdg_vertex (struct graph *rdg, int i)
4465 {
4466 dump_rdg_vertex (stderr, rdg, i);
4467 }
4468
4469 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4470 dumped vertices to that bitmap. */
4471
4472 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4473 {
4474 int i;
4475
4476 fprintf (file, "(%d\n", c);
4477
4478 for (i = 0; i < rdg->n_vertices; i++)
4479 if (rdg->vertices[i].component == c)
4480 {
4481 if (dumped)
4482 bitmap_set_bit (dumped, i);
4483
4484 dump_rdg_vertex (file, rdg, i);
4485 }
4486
4487 fprintf (file, ")\n");
4488 }
4489
4490 /* Call dump_rdg_vertex on stderr. */
4491
4492 void
4493 debug_rdg_component (struct graph *rdg, int c)
4494 {
4495 dump_rdg_component (stderr, rdg, c, NULL);
4496 }
4497
4498 /* Dump the reduced dependence graph RDG to FILE. */
4499
4500 void
4501 dump_rdg (FILE *file, struct graph *rdg)
4502 {
4503 int i;
4504 bitmap dumped = BITMAP_ALLOC (NULL);
4505
4506 fprintf (file, "(rdg\n");
4507
4508 for (i = 0; i < rdg->n_vertices; i++)
4509 if (!bitmap_bit_p (dumped, i))
4510 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4511
4512 fprintf (file, ")\n");
4513 BITMAP_FREE (dumped);
4514 }
4515
4516 /* Call dump_rdg on stderr. */
4517
4518 void
4519 debug_rdg (struct graph *rdg)
4520 {
4521 dump_rdg (stderr, rdg);
4522 }
4523
4524 static void
4525 dot_rdg_1 (FILE *file, struct graph *rdg)
4526 {
4527 int i;
4528
4529 fprintf (file, "digraph RDG {\n");
4530
4531 for (i = 0; i < rdg->n_vertices; i++)
4532 {
4533 struct vertex *v = &(rdg->vertices[i]);
4534 struct graph_edge *e;
4535
4536 /* Highlight reads from memory. */
4537 if (RDG_MEM_READS_STMT (rdg, i))
4538 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4539
4540 /* Highlight stores to memory. */
4541 if (RDG_MEM_WRITE_STMT (rdg, i))
4542 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4543
4544 if (v->succ)
4545 for (e = v->succ; e; e = e->succ_next)
4546 switch (RDGE_TYPE (e))
4547 {
4548 case input_dd:
4549 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4550 break;
4551
4552 case output_dd:
4553 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4554 break;
4555
4556 case flow_dd:
4557 /* These are the most common dependences: don't print these. */
4558 fprintf (file, "%d -> %d \n", i, e->dest);
4559 break;
4560
4561 case anti_dd:
4562 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4563 break;
4564
4565 default:
4566 gcc_unreachable ();
4567 }
4568 }
4569
4570 fprintf (file, "}\n\n");
4571 }
4572
4573 /* Display SCOP using dotty. */
4574
4575 void
4576 dot_rdg (struct graph *rdg)
4577 {
4578 FILE *file = fopen ("/tmp/rdg.dot", "w");
4579 gcc_assert (file != NULL);
4580
4581 dot_rdg_1 (file, rdg);
4582 fclose (file);
4583
4584 system ("dotty /tmp/rdg.dot");
4585 }
4586
4587
4588 /* This structure is used for recording the mapping statement index in
4589 the RDG. */
4590
4591 struct GTY(()) rdg_vertex_info
4592 {
4593 gimple stmt;
4594 int index;
4595 };
4596
4597 /* Returns the index of STMT in RDG. */
4598
4599 int
4600 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4601 {
4602 struct rdg_vertex_info rvi, *slot;
4603
4604 rvi.stmt = stmt;
4605 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4606
4607 if (!slot)
4608 return -1;
4609
4610 return slot->index;
4611 }
4612
4613 /* Creates an edge in RDG for each distance vector from DDR. The
4614 order that we keep track of in the RDG is the order in which
4615 statements have to be executed. */
4616
4617 static void
4618 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4619 {
4620 struct graph_edge *e;
4621 int va, vb;
4622 data_reference_p dra = DDR_A (ddr);
4623 data_reference_p drb = DDR_B (ddr);
4624 unsigned level = ddr_dependence_level (ddr);
4625
4626 /* For non scalar dependences, when the dependence is REVERSED,
4627 statement B has to be executed before statement A. */
4628 if (level > 0
4629 && !DDR_REVERSED_P (ddr))
4630 {
4631 data_reference_p tmp = dra;
4632 dra = drb;
4633 drb = tmp;
4634 }
4635
4636 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4637 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4638
4639 if (va < 0 || vb < 0)
4640 return;
4641
4642 e = add_edge (rdg, va, vb);
4643 e->data = XNEW (struct rdg_edge);
4644
4645 RDGE_LEVEL (e) = level;
4646 RDGE_RELATION (e) = ddr;
4647
4648 /* Determines the type of the data dependence. */
4649 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4650 RDGE_TYPE (e) = input_dd;
4651 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4652 RDGE_TYPE (e) = output_dd;
4653 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4654 RDGE_TYPE (e) = flow_dd;
4655 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4656 RDGE_TYPE (e) = anti_dd;
4657 }
4658
4659 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4660 the index of DEF in RDG. */
4661
4662 static void
4663 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4664 {
4665 use_operand_p imm_use_p;
4666 imm_use_iterator iterator;
4667
4668 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4669 {
4670 struct graph_edge *e;
4671 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4672
4673 if (use < 0)
4674 continue;
4675
4676 e = add_edge (rdg, idef, use);
4677 e->data = XNEW (struct rdg_edge);
4678 RDGE_TYPE (e) = flow_dd;
4679 RDGE_RELATION (e) = NULL;
4680 }
4681 }
4682
4683 /* Creates the edges of the reduced dependence graph RDG. */
4684
4685 static void
4686 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4687 {
4688 int i;
4689 struct data_dependence_relation *ddr;
4690 def_operand_p def_p;
4691 ssa_op_iter iter;
4692
4693 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4694 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4695 create_rdg_edge_for_ddr (rdg, ddr);
4696
4697 for (i = 0; i < rdg->n_vertices; i++)
4698 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4699 iter, SSA_OP_DEF)
4700 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4701 }
4702
4703 /* Build the vertices of the reduced dependence graph RDG. */
4704
4705 void
4706 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4707 {
4708 int i, j;
4709 gimple stmt;
4710
4711 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4712 {
4713 VEC (data_ref_loc, heap) *references;
4714 data_ref_loc *ref;
4715 struct vertex *v = &(rdg->vertices[i]);
4716 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4717 struct rdg_vertex_info **slot;
4718
4719 rvi->stmt = stmt;
4720 rvi->index = i;
4721 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4722
4723 if (!*slot)
4724 *slot = rvi;
4725 else
4726 free (rvi);
4727
4728 v->data = XNEW (struct rdg_vertex);
4729 RDG_STMT (rdg, i) = stmt;
4730
4731 RDG_MEM_WRITE_STMT (rdg, i) = false;
4732 RDG_MEM_READS_STMT (rdg, i) = false;
4733 if (gimple_code (stmt) == GIMPLE_PHI)
4734 continue;
4735
4736 get_references_in_stmt (stmt, &references);
4737 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4738 if (!ref->is_read)
4739 RDG_MEM_WRITE_STMT (rdg, i) = true;
4740 else
4741 RDG_MEM_READS_STMT (rdg, i) = true;
4742
4743 VEC_free (data_ref_loc, heap, references);
4744 }
4745 }
4746
4747 /* Initialize STMTS with all the statements of LOOP. When
4748 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4749 which we discover statements is important as
4750 generate_loops_for_partition is using the same traversal for
4751 identifying statements. */
4752
4753 static void
4754 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4755 {
4756 unsigned int i;
4757 basic_block *bbs = get_loop_body_in_dom_order (loop);
4758
4759 for (i = 0; i < loop->num_nodes; i++)
4760 {
4761 basic_block bb = bbs[i];
4762 gimple_stmt_iterator bsi;
4763 gimple stmt;
4764
4765 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4766 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4767
4768 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4769 {
4770 stmt = gsi_stmt (bsi);
4771 if (gimple_code (stmt) != GIMPLE_LABEL)
4772 VEC_safe_push (gimple, heap, *stmts, stmt);
4773 }
4774 }
4775
4776 free (bbs);
4777 }
4778
4779 /* Returns true when all the dependences are computable. */
4780
4781 static bool
4782 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4783 {
4784 ddr_p ddr;
4785 unsigned int i;
4786
4787 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4788 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4789 return false;
4790
4791 return true;
4792 }
4793
4794 /* Computes a hash function for element ELT. */
4795
4796 static hashval_t
4797 hash_stmt_vertex_info (const void *elt)
4798 {
4799 const struct rdg_vertex_info *const rvi =
4800 (const struct rdg_vertex_info *) elt;
4801 gimple stmt = rvi->stmt;
4802
4803 return htab_hash_pointer (stmt);
4804 }
4805
4806 /* Compares database elements E1 and E2. */
4807
4808 static int
4809 eq_stmt_vertex_info (const void *e1, const void *e2)
4810 {
4811 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4812 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4813
4814 return elt1->stmt == elt2->stmt;
4815 }
4816
4817 /* Free the element E. */
4818
4819 static void
4820 hash_stmt_vertex_del (void *e)
4821 {
4822 free (e);
4823 }
4824
4825 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4826 statement of the loop nest, and one edge per data dependence or
4827 scalar dependence. */
4828
4829 struct graph *
4830 build_empty_rdg (int n_stmts)
4831 {
4832 int nb_data_refs = 10;
4833 struct graph *rdg = new_graph (n_stmts);
4834
4835 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4836 eq_stmt_vertex_info, hash_stmt_vertex_del);
4837 return rdg;
4838 }
4839
4840 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4841 statement of the loop nest, and one edge per data dependence or
4842 scalar dependence. */
4843
4844 struct graph *
4845 build_rdg (struct loop *loop)
4846 {
4847 int nb_data_refs = 10;
4848 struct graph *rdg = NULL;
4849 VEC (ddr_p, heap) *dependence_relations;
4850 VEC (data_reference_p, heap) *datarefs;
4851 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4852
4853 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4854 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4855 compute_data_dependences_for_loop (loop,
4856 false,
4857 &datarefs,
4858 &dependence_relations);
4859
4860 if (!known_dependences_p (dependence_relations))
4861 {
4862 free_dependence_relations (dependence_relations);
4863 free_data_refs (datarefs);
4864 VEC_free (gimple, heap, stmts);
4865
4866 return rdg;
4867 }
4868
4869 stmts_from_loop (loop, &stmts);
4870 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4871
4872 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4873 eq_stmt_vertex_info, hash_stmt_vertex_del);
4874 create_rdg_vertices (rdg, stmts);
4875 create_rdg_edges (rdg, dependence_relations);
4876
4877 VEC_free (gimple, heap, stmts);
4878 return rdg;
4879 }
4880
4881 /* Free the reduced dependence graph RDG. */
4882
4883 void
4884 free_rdg (struct graph *rdg)
4885 {
4886 int i;
4887
4888 for (i = 0; i < rdg->n_vertices; i++)
4889 free (rdg->vertices[i].data);
4890
4891 htab_delete (rdg->indices);
4892 free_graph (rdg);
4893 }
4894
4895 /* Initialize STMTS with all the statements of LOOP that contain a
4896 store to memory. */
4897
4898 void
4899 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4900 {
4901 unsigned int i;
4902 basic_block *bbs = get_loop_body_in_dom_order (loop);
4903
4904 for (i = 0; i < loop->num_nodes; i++)
4905 {
4906 basic_block bb = bbs[i];
4907 gimple_stmt_iterator bsi;
4908
4909 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4910 if (gimple_vdef (gsi_stmt (bsi)))
4911 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4912 }
4913
4914 free (bbs);
4915 }
4916
4917 /* For a data reference REF, return the declaration of its base
4918 address or NULL_TREE if the base is not determined. */
4919
4920 static inline tree
4921 ref_base_address (gimple stmt, data_ref_loc *ref)
4922 {
4923 tree base = NULL_TREE;
4924 tree base_address;
4925 struct data_reference *dr = XCNEW (struct data_reference);
4926
4927 DR_STMT (dr) = stmt;
4928 DR_REF (dr) = *ref->pos;
4929 dr_analyze_innermost (dr);
4930 base_address = DR_BASE_ADDRESS (dr);
4931
4932 if (!base_address)
4933 goto end;
4934
4935 switch (TREE_CODE (base_address))
4936 {
4937 case ADDR_EXPR:
4938 base = TREE_OPERAND (base_address, 0);
4939 break;
4940
4941 default:
4942 base = base_address;
4943 break;
4944 }
4945
4946 end:
4947 free_data_ref (dr);
4948 return base;
4949 }
4950
4951 /* Determines whether the statement from vertex V of the RDG has a
4952 definition used outside the loop that contains this statement. */
4953
4954 bool
4955 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
4956 {
4957 gimple stmt = RDG_STMT (rdg, v);
4958 struct loop *loop = loop_containing_stmt (stmt);
4959 use_operand_p imm_use_p;
4960 imm_use_iterator iterator;
4961 ssa_op_iter it;
4962 def_operand_p def_p;
4963
4964 if (!loop)
4965 return true;
4966
4967 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
4968 {
4969 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
4970 {
4971 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
4972 return true;
4973 }
4974 }
4975
4976 return false;
4977 }
4978
4979 /* Determines whether statements S1 and S2 access to similar memory
4980 locations. Two memory accesses are considered similar when they
4981 have the same base address declaration, i.e. when their
4982 ref_base_address is the same. */
4983
4984 bool
4985 have_similar_memory_accesses (gimple s1, gimple s2)
4986 {
4987 bool res = false;
4988 unsigned i, j;
4989 VEC (data_ref_loc, heap) *refs1, *refs2;
4990 data_ref_loc *ref1, *ref2;
4991
4992 get_references_in_stmt (s1, &refs1);
4993 get_references_in_stmt (s2, &refs2);
4994
4995 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
4996 {
4997 tree base1 = ref_base_address (s1, ref1);
4998
4999 if (base1)
5000 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5001 if (base1 == ref_base_address (s2, ref2))
5002 {
5003 res = true;
5004 goto end;
5005 }
5006 }
5007
5008 end:
5009 VEC_free (data_ref_loc, heap, refs1);
5010 VEC_free (data_ref_loc, heap, refs2);
5011 return res;
5012 }
5013
5014 /* Helper function for the hashtab. */
5015
5016 static int
5017 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5018 {
5019 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5020 CONST_CAST_GIMPLE ((const_gimple) s2));
5021 }
5022
5023 /* Helper function for the hashtab. */
5024
5025 static hashval_t
5026 ref_base_address_1 (const void *s)
5027 {
5028 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5029 unsigned i;
5030 VEC (data_ref_loc, heap) *refs;
5031 data_ref_loc *ref;
5032 hashval_t res = 0;
5033
5034 get_references_in_stmt (stmt, &refs);
5035
5036 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5037 if (!ref->is_read)
5038 {
5039 res = htab_hash_pointer (ref_base_address (stmt, ref));
5040 break;
5041 }
5042
5043 VEC_free (data_ref_loc, heap, refs);
5044 return res;
5045 }
5046
5047 /* Try to remove duplicated write data references from STMTS. */
5048
5049 void
5050 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5051 {
5052 unsigned i;
5053 gimple stmt;
5054 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5055 have_similar_memory_accesses_1, NULL);
5056
5057 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5058 {
5059 void **slot;
5060
5061 slot = htab_find_slot (seen, stmt, INSERT);
5062
5063 if (*slot)
5064 VEC_ordered_remove (gimple, *stmts, i);
5065 else
5066 {
5067 *slot = (void *) stmt;
5068 i++;
5069 }
5070 }
5071
5072 htab_delete (seen);
5073 }
5074
5075 /* Returns the index of PARAMETER in the parameters vector of the
5076 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5077
5078 int
5079 access_matrix_get_index_for_parameter (tree parameter,
5080 struct access_matrix *access_matrix)
5081 {
5082 int i;
5083 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5084 tree lambda_parameter;
5085
5086 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5087 if (lambda_parameter == parameter)
5088 return i + AM_NB_INDUCTION_VARS (access_matrix);
5089
5090 return -1;
5091 }