intrinsic.h (gfc_check_selected_real_kind, [...]): Update prototypes.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "flags.h"
83 #include "tree.h"
84 #include "basic-block.h"
85 #include "tree-pretty-print.h"
86 #include "gimple-pretty-print.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
89 #include "timevar.h"
90 #include "cfgloop.h"
91 #include "tree-data-ref.h"
92 #include "tree-scalar-evolution.h"
93 #include "tree-pass.h"
94 #include "langhooks.h"
95
96 static struct datadep_stats
97 {
98 int num_dependence_tests;
99 int num_dependence_dependent;
100 int num_dependence_independent;
101 int num_dependence_undetermined;
102
103 int num_subscript_tests;
104 int num_subscript_undetermined;
105 int num_same_subscript_function;
106
107 int num_ziv;
108 int num_ziv_independent;
109 int num_ziv_dependent;
110 int num_ziv_unimplemented;
111
112 int num_siv;
113 int num_siv_independent;
114 int num_siv_dependent;
115 int num_siv_unimplemented;
116
117 int num_miv;
118 int num_miv_independent;
119 int num_miv_dependent;
120 int num_miv_unimplemented;
121 } dependence_stats;
122
123 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
124 struct data_reference *,
125 struct data_reference *,
126 struct loop *);
127 /* Returns true iff A divides B. */
128
129 static inline bool
130 tree_fold_divides_p (const_tree a, const_tree b)
131 {
132 gcc_assert (TREE_CODE (a) == INTEGER_CST);
133 gcc_assert (TREE_CODE (b) == INTEGER_CST);
134 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
135 }
136
137 /* Returns true iff A divides B. */
138
139 static inline bool
140 int_divides_p (int a, int b)
141 {
142 return ((b % a) == 0);
143 }
144
145 \f
146
147 /* Dump into FILE all the data references from DATAREFS. */
148
149 void
150 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
151 {
152 unsigned int i;
153 struct data_reference *dr;
154
155 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
156 dump_data_reference (file, dr);
157 }
158
159 /* Dump into STDERR all the data references from DATAREFS. */
160
161 DEBUG_FUNCTION void
162 debug_data_references (VEC (data_reference_p, heap) *datarefs)
163 {
164 dump_data_references (stderr, datarefs);
165 }
166
167 /* Dump to STDERR all the dependence relations from DDRS. */
168
169 DEBUG_FUNCTION void
170 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
171 {
172 dump_data_dependence_relations (stderr, ddrs);
173 }
174
175 /* Dump into FILE all the dependence relations from DDRS. */
176
177 void
178 dump_data_dependence_relations (FILE *file,
179 VEC (ddr_p, heap) *ddrs)
180 {
181 unsigned int i;
182 struct data_dependence_relation *ddr;
183
184 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
185 dump_data_dependence_relation (file, ddr);
186 }
187
188 /* Print to STDERR the data_reference DR. */
189
190 DEBUG_FUNCTION void
191 debug_data_reference (struct data_reference *dr)
192 {
193 dump_data_reference (stderr, dr);
194 }
195
196 /* Dump function for a DATA_REFERENCE structure. */
197
198 void
199 dump_data_reference (FILE *outf,
200 struct data_reference *dr)
201 {
202 unsigned int i;
203
204 fprintf (outf, "#(Data Ref: \n# stmt: ");
205 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
206 fprintf (outf, "# ref: ");
207 print_generic_stmt (outf, DR_REF (dr), 0);
208 fprintf (outf, "# base_object: ");
209 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
210
211 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
212 {
213 fprintf (outf, "# Access function %d: ", i);
214 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
215 }
216 fprintf (outf, "#)\n");
217 }
218
219 /* Dumps the affine function described by FN to the file OUTF. */
220
221 static void
222 dump_affine_function (FILE *outf, affine_fn fn)
223 {
224 unsigned i;
225 tree coef;
226
227 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
228 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
229 {
230 fprintf (outf, " + ");
231 print_generic_expr (outf, coef, TDF_SLIM);
232 fprintf (outf, " * x_%u", i);
233 }
234 }
235
236 /* Dumps the conflict function CF to the file OUTF. */
237
238 static void
239 dump_conflict_function (FILE *outf, conflict_function *cf)
240 {
241 unsigned i;
242
243 if (cf->n == NO_DEPENDENCE)
244 fprintf (outf, "no dependence\n");
245 else if (cf->n == NOT_KNOWN)
246 fprintf (outf, "not known\n");
247 else
248 {
249 for (i = 0; i < cf->n; i++)
250 {
251 fprintf (outf, "[");
252 dump_affine_function (outf, cf->fns[i]);
253 fprintf (outf, "]\n");
254 }
255 }
256 }
257
258 /* Dump function for a SUBSCRIPT structure. */
259
260 void
261 dump_subscript (FILE *outf, struct subscript *subscript)
262 {
263 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
264
265 fprintf (outf, "\n (subscript \n");
266 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
267 dump_conflict_function (outf, cf);
268 if (CF_NONTRIVIAL_P (cf))
269 {
270 tree last_iteration = SUB_LAST_CONFLICT (subscript);
271 fprintf (outf, " last_conflict: ");
272 print_generic_stmt (outf, last_iteration, 0);
273 }
274
275 cf = SUB_CONFLICTS_IN_B (subscript);
276 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
277 dump_conflict_function (outf, cf);
278 if (CF_NONTRIVIAL_P (cf))
279 {
280 tree last_iteration = SUB_LAST_CONFLICT (subscript);
281 fprintf (outf, " last_conflict: ");
282 print_generic_stmt (outf, last_iteration, 0);
283 }
284
285 fprintf (outf, " (Subscript distance: ");
286 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
287 fprintf (outf, " )\n");
288 fprintf (outf, " )\n");
289 }
290
291 /* Print the classic direction vector DIRV to OUTF. */
292
293 void
294 print_direction_vector (FILE *outf,
295 lambda_vector dirv,
296 int length)
297 {
298 int eq;
299
300 for (eq = 0; eq < length; eq++)
301 {
302 enum data_dependence_direction dir = ((enum data_dependence_direction)
303 dirv[eq]);
304
305 switch (dir)
306 {
307 case dir_positive:
308 fprintf (outf, " +");
309 break;
310 case dir_negative:
311 fprintf (outf, " -");
312 break;
313 case dir_equal:
314 fprintf (outf, " =");
315 break;
316 case dir_positive_or_equal:
317 fprintf (outf, " +=");
318 break;
319 case dir_positive_or_negative:
320 fprintf (outf, " +-");
321 break;
322 case dir_negative_or_equal:
323 fprintf (outf, " -=");
324 break;
325 case dir_star:
326 fprintf (outf, " *");
327 break;
328 default:
329 fprintf (outf, "indep");
330 break;
331 }
332 }
333 fprintf (outf, "\n");
334 }
335
336 /* Print a vector of direction vectors. */
337
338 void
339 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
340 int length)
341 {
342 unsigned j;
343 lambda_vector v;
344
345 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
346 print_direction_vector (outf, v, length);
347 }
348
349 /* Print a vector of distance vectors. */
350
351 void
352 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
353 int length)
354 {
355 unsigned j;
356 lambda_vector v;
357
358 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
359 print_lambda_vector (outf, v, length);
360 }
361
362 /* Debug version. */
363
364 DEBUG_FUNCTION void
365 debug_data_dependence_relation (struct data_dependence_relation *ddr)
366 {
367 dump_data_dependence_relation (stderr, ddr);
368 }
369
370 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
371
372 void
373 dump_data_dependence_relation (FILE *outf,
374 struct data_dependence_relation *ddr)
375 {
376 struct data_reference *dra, *drb;
377
378 fprintf (outf, "(Data Dep: \n");
379
380 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
381 {
382 if (ddr)
383 {
384 dra = DDR_A (ddr);
385 drb = DDR_B (ddr);
386 if (dra)
387 dump_data_reference (outf, dra);
388 else
389 fprintf (outf, " (nil)\n");
390 if (drb)
391 dump_data_reference (outf, drb);
392 else
393 fprintf (outf, " (nil)\n");
394 }
395 fprintf (outf, " (don't know)\n)\n");
396 return;
397 }
398
399 dra = DDR_A (ddr);
400 drb = DDR_B (ddr);
401 dump_data_reference (outf, dra);
402 dump_data_reference (outf, drb);
403
404 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
405 fprintf (outf, " (no dependence)\n");
406
407 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
408 {
409 unsigned int i;
410 struct loop *loopi;
411
412 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
413 {
414 fprintf (outf, " access_fn_A: ");
415 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
416 fprintf (outf, " access_fn_B: ");
417 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
418 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
419 }
420
421 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
422 fprintf (outf, " loop nest: (");
423 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
424 fprintf (outf, "%d ", loopi->num);
425 fprintf (outf, ")\n");
426
427 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
428 {
429 fprintf (outf, " distance_vector: ");
430 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
431 DDR_NB_LOOPS (ddr));
432 }
433
434 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
435 {
436 fprintf (outf, " direction_vector: ");
437 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
439 }
440 }
441
442 fprintf (outf, ")\n");
443 }
444
445 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
446
447 void
448 dump_data_dependence_direction (FILE *file,
449 enum data_dependence_direction dir)
450 {
451 switch (dir)
452 {
453 case dir_positive:
454 fprintf (file, "+");
455 break;
456
457 case dir_negative:
458 fprintf (file, "-");
459 break;
460
461 case dir_equal:
462 fprintf (file, "=");
463 break;
464
465 case dir_positive_or_negative:
466 fprintf (file, "+-");
467 break;
468
469 case dir_positive_or_equal:
470 fprintf (file, "+=");
471 break;
472
473 case dir_negative_or_equal:
474 fprintf (file, "-=");
475 break;
476
477 case dir_star:
478 fprintf (file, "*");
479 break;
480
481 default:
482 break;
483 }
484 }
485
486 /* Dumps the distance and direction vectors in FILE. DDRS contains
487 the dependence relations, and VECT_SIZE is the size of the
488 dependence vectors, or in other words the number of loops in the
489 considered nest. */
490
491 void
492 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
493 {
494 unsigned int i, j;
495 struct data_dependence_relation *ddr;
496 lambda_vector v;
497
498 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
499 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
500 {
501 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
502 {
503 fprintf (file, "DISTANCE_V (");
504 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
505 fprintf (file, ")\n");
506 }
507
508 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
509 {
510 fprintf (file, "DIRECTION_V (");
511 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
513 }
514 }
515
516 fprintf (file, "\n\n");
517 }
518
519 /* Dumps the data dependence relations DDRS in FILE. */
520
521 void
522 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
523 {
524 unsigned int i;
525 struct data_dependence_relation *ddr;
526
527 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
528 dump_data_dependence_relation (file, ddr);
529
530 fprintf (file, "\n\n");
531 }
532
533 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
534 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
535 constant of type ssizetype, and returns true. If we cannot do this
536 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
537 is returned. */
538
539 static bool
540 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
541 tree *var, tree *off)
542 {
543 tree var0, var1;
544 tree off0, off1;
545 enum tree_code ocode = code;
546
547 *var = NULL_TREE;
548 *off = NULL_TREE;
549
550 switch (code)
551 {
552 case INTEGER_CST:
553 *var = build_int_cst (type, 0);
554 *off = fold_convert (ssizetype, op0);
555 return true;
556
557 case POINTER_PLUS_EXPR:
558 ocode = PLUS_EXPR;
559 /* FALLTHROUGH */
560 case PLUS_EXPR:
561 case MINUS_EXPR:
562 split_constant_offset (op0, &var0, &off0);
563 split_constant_offset (op1, &var1, &off1);
564 *var = fold_build2 (code, type, var0, var1);
565 *off = size_binop (ocode, off0, off1);
566 return true;
567
568 case MULT_EXPR:
569 if (TREE_CODE (op1) != INTEGER_CST)
570 return false;
571
572 split_constant_offset (op0, &var0, &off0);
573 *var = fold_build2 (MULT_EXPR, type, var0, op1);
574 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
575 return true;
576
577 case ADDR_EXPR:
578 {
579 tree base, poffset;
580 HOST_WIDE_INT pbitsize, pbitpos;
581 enum machine_mode pmode;
582 int punsignedp, pvolatilep;
583
584 op0 = TREE_OPERAND (op0, 0);
585 if (!handled_component_p (op0))
586 return false;
587
588 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
589 &pmode, &punsignedp, &pvolatilep, false);
590
591 if (pbitpos % BITS_PER_UNIT != 0)
592 return false;
593 base = build_fold_addr_expr (base);
594 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
595
596 if (poffset)
597 {
598 split_constant_offset (poffset, &poffset, &off1);
599 off0 = size_binop (PLUS_EXPR, off0, off1);
600 if (POINTER_TYPE_P (TREE_TYPE (base)))
601 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
602 base, fold_convert (sizetype, poffset));
603 else
604 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
605 fold_convert (TREE_TYPE (base), poffset));
606 }
607
608 var0 = fold_convert (type, base);
609
610 /* If variable length types are involved, punt, otherwise casts
611 might be converted into ARRAY_REFs in gimplify_conversion.
612 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
613 possibly no longer appears in current GIMPLE, might resurface.
614 This perhaps could run
615 if (CONVERT_EXPR_P (var0))
616 {
617 gimplify_conversion (&var0);
618 // Attempt to fill in any within var0 found ARRAY_REF's
619 // element size from corresponding op embedded ARRAY_REF,
620 // if unsuccessful, just punt.
621 } */
622 while (POINTER_TYPE_P (type))
623 type = TREE_TYPE (type);
624 if (int_size_in_bytes (type) < 0)
625 return false;
626
627 *var = var0;
628 *off = off0;
629 return true;
630 }
631
632 case SSA_NAME:
633 {
634 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
635 enum tree_code subcode;
636
637 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
638 return false;
639
640 var0 = gimple_assign_rhs1 (def_stmt);
641 subcode = gimple_assign_rhs_code (def_stmt);
642 var1 = gimple_assign_rhs2 (def_stmt);
643
644 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
645 }
646 CASE_CONVERT:
647 {
648 /* We must not introduce undefined overflow, and we must not change the value.
649 Hence we're okay if the inner type doesn't overflow to start with
650 (pointer or signed), the outer type also is an integer or pointer
651 and the outer precision is at least as large as the inner. */
652 tree itype = TREE_TYPE (op0);
653 if ((POINTER_TYPE_P (itype)
654 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
655 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
656 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
657 {
658 split_constant_offset (op0, &var0, off);
659 *var = fold_convert (type, var0);
660 return true;
661 }
662 return false;
663 }
664
665 default:
666 return false;
667 }
668 }
669
670 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
671 will be ssizetype. */
672
673 void
674 split_constant_offset (tree exp, tree *var, tree *off)
675 {
676 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
677 enum tree_code code;
678
679 *var = exp;
680 *off = ssize_int (0);
681 STRIP_NOPS (exp);
682
683 if (automatically_generated_chrec_p (exp))
684 return;
685
686 otype = TREE_TYPE (exp);
687 code = TREE_CODE (exp);
688 extract_ops_from_tree (exp, &code, &op0, &op1);
689 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
690 {
691 *var = fold_convert (type, e);
692 *off = o;
693 }
694 }
695
696 /* Returns the address ADDR of an object in a canonical shape (without nop
697 casts, and with type of pointer to the object). */
698
699 static tree
700 canonicalize_base_object_address (tree addr)
701 {
702 tree orig = addr;
703
704 STRIP_NOPS (addr);
705
706 /* The base address may be obtained by casting from integer, in that case
707 keep the cast. */
708 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
709 return orig;
710
711 if (TREE_CODE (addr) != ADDR_EXPR)
712 return addr;
713
714 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
715 }
716
717 /* Analyzes the behavior of the memory reference DR in the innermost loop or
718 basic block that contains it. Returns true if analysis succeed or false
719 otherwise. */
720
721 bool
722 dr_analyze_innermost (struct data_reference *dr)
723 {
724 gimple stmt = DR_STMT (dr);
725 struct loop *loop = loop_containing_stmt (stmt);
726 tree ref = DR_REF (dr);
727 HOST_WIDE_INT pbitsize, pbitpos;
728 tree base, poffset;
729 enum machine_mode pmode;
730 int punsignedp, pvolatilep;
731 affine_iv base_iv, offset_iv;
732 tree init, dinit, step;
733 bool in_loop = (loop && loop->num);
734
735 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, "analyze_innermost: ");
737
738 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
739 &pmode, &punsignedp, &pvolatilep, false);
740 gcc_assert (base != NULL_TREE);
741
742 if (pbitpos % BITS_PER_UNIT != 0)
743 {
744 if (dump_file && (dump_flags & TDF_DETAILS))
745 fprintf (dump_file, "failed: bit offset alignment.\n");
746 return false;
747 }
748
749 base = build_fold_addr_expr (base);
750 if (in_loop)
751 {
752 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
753 false))
754 {
755 if (dump_file && (dump_flags & TDF_DETAILS))
756 fprintf (dump_file, "failed: evolution of base is not affine.\n");
757 return false;
758 }
759 }
760 else
761 {
762 base_iv.base = base;
763 base_iv.step = ssize_int (0);
764 base_iv.no_overflow = true;
765 }
766
767 if (!poffset)
768 {
769 offset_iv.base = ssize_int (0);
770 offset_iv.step = ssize_int (0);
771 }
772 else
773 {
774 if (!in_loop)
775 {
776 offset_iv.base = poffset;
777 offset_iv.step = ssize_int (0);
778 }
779 else if (!simple_iv (loop, loop_containing_stmt (stmt),
780 poffset, &offset_iv, false))
781 {
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "failed: evolution of offset is not"
784 " affine.\n");
785 return false;
786 }
787 }
788
789 init = ssize_int (pbitpos / BITS_PER_UNIT);
790 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
791 init = size_binop (PLUS_EXPR, init, dinit);
792 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
793 init = size_binop (PLUS_EXPR, init, dinit);
794
795 step = size_binop (PLUS_EXPR,
796 fold_convert (ssizetype, base_iv.step),
797 fold_convert (ssizetype, offset_iv.step));
798
799 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
800
801 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
802 DR_INIT (dr) = init;
803 DR_STEP (dr) = step;
804
805 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
806
807 if (dump_file && (dump_flags & TDF_DETAILS))
808 fprintf (dump_file, "success.\n");
809
810 return true;
811 }
812
813 /* Determines the base object and the list of indices of memory reference
814 DR, analyzed in loop nest NEST. */
815
816 static void
817 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
818 {
819 gimple stmt = DR_STMT (dr);
820 struct loop *loop = loop_containing_stmt (stmt);
821 VEC (tree, heap) *access_fns = NULL;
822 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
823 tree base, off, access_fn = NULL_TREE;
824 basic_block before_loop = NULL;
825
826 if (nest)
827 before_loop = block_before_loop (nest);
828
829 while (handled_component_p (aref))
830 {
831 if (TREE_CODE (aref) == ARRAY_REF)
832 {
833 op = TREE_OPERAND (aref, 1);
834 if (nest)
835 {
836 access_fn = analyze_scalar_evolution (loop, op);
837 access_fn = instantiate_scev (before_loop, loop, access_fn);
838 VEC_safe_push (tree, heap, access_fns, access_fn);
839 }
840
841 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
842 }
843
844 aref = TREE_OPERAND (aref, 0);
845 }
846
847 if (nest && INDIRECT_REF_P (aref))
848 {
849 op = TREE_OPERAND (aref, 0);
850 access_fn = analyze_scalar_evolution (loop, op);
851 access_fn = instantiate_scev (before_loop, loop, access_fn);
852 base = initial_condition (access_fn);
853 split_constant_offset (base, &base, &off);
854 access_fn = chrec_replace_initial_condition (access_fn,
855 fold_convert (TREE_TYPE (base), off));
856
857 TREE_OPERAND (aref, 0) = base;
858 VEC_safe_push (tree, heap, access_fns, access_fn);
859 }
860
861 DR_BASE_OBJECT (dr) = ref;
862 DR_ACCESS_FNS (dr) = access_fns;
863 }
864
865 /* Extracts the alias analysis information from the memory reference DR. */
866
867 static void
868 dr_analyze_alias (struct data_reference *dr)
869 {
870 tree ref = DR_REF (dr);
871 tree base = get_base_address (ref), addr;
872
873 if (INDIRECT_REF_P (base))
874 {
875 addr = TREE_OPERAND (base, 0);
876 if (TREE_CODE (addr) == SSA_NAME)
877 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
878 }
879 }
880
881 /* Returns true if the address of DR is invariant. */
882
883 static bool
884 dr_address_invariant_p (struct data_reference *dr)
885 {
886 unsigned i;
887 tree idx;
888
889 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
890 if (tree_contains_chrecs (idx, NULL))
891 return false;
892
893 return true;
894 }
895
896 /* Frees data reference DR. */
897
898 void
899 free_data_ref (data_reference_p dr)
900 {
901 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
902 free (dr);
903 }
904
905 /* Analyzes memory reference MEMREF accessed in STMT. The reference
906 is read if IS_READ is true, write otherwise. Returns the
907 data_reference description of MEMREF. NEST is the outermost loop of the
908 loop nest in that the reference should be analyzed. */
909
910 struct data_reference *
911 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
912 {
913 struct data_reference *dr;
914
915 if (dump_file && (dump_flags & TDF_DETAILS))
916 {
917 fprintf (dump_file, "Creating dr for ");
918 print_generic_expr (dump_file, memref, TDF_SLIM);
919 fprintf (dump_file, "\n");
920 }
921
922 dr = XCNEW (struct data_reference);
923 DR_STMT (dr) = stmt;
924 DR_REF (dr) = memref;
925 DR_IS_READ (dr) = is_read;
926
927 dr_analyze_innermost (dr);
928 dr_analyze_indices (dr, nest);
929 dr_analyze_alias (dr);
930
931 if (dump_file && (dump_flags & TDF_DETAILS))
932 {
933 fprintf (dump_file, "\tbase_address: ");
934 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
935 fprintf (dump_file, "\n\toffset from base address: ");
936 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
937 fprintf (dump_file, "\n\tconstant offset from base address: ");
938 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
939 fprintf (dump_file, "\n\tstep: ");
940 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
941 fprintf (dump_file, "\n\taligned to: ");
942 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
943 fprintf (dump_file, "\n\tbase_object: ");
944 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
945 fprintf (dump_file, "\n");
946 }
947
948 return dr;
949 }
950
951 /* Returns true if FNA == FNB. */
952
953 static bool
954 affine_function_equal_p (affine_fn fna, affine_fn fnb)
955 {
956 unsigned i, n = VEC_length (tree, fna);
957
958 if (n != VEC_length (tree, fnb))
959 return false;
960
961 for (i = 0; i < n; i++)
962 if (!operand_equal_p (VEC_index (tree, fna, i),
963 VEC_index (tree, fnb, i), 0))
964 return false;
965
966 return true;
967 }
968
969 /* If all the functions in CF are the same, returns one of them,
970 otherwise returns NULL. */
971
972 static affine_fn
973 common_affine_function (conflict_function *cf)
974 {
975 unsigned i;
976 affine_fn comm;
977
978 if (!CF_NONTRIVIAL_P (cf))
979 return NULL;
980
981 comm = cf->fns[0];
982
983 for (i = 1; i < cf->n; i++)
984 if (!affine_function_equal_p (comm, cf->fns[i]))
985 return NULL;
986
987 return comm;
988 }
989
990 /* Returns the base of the affine function FN. */
991
992 static tree
993 affine_function_base (affine_fn fn)
994 {
995 return VEC_index (tree, fn, 0);
996 }
997
998 /* Returns true if FN is a constant. */
999
1000 static bool
1001 affine_function_constant_p (affine_fn fn)
1002 {
1003 unsigned i;
1004 tree coef;
1005
1006 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1007 if (!integer_zerop (coef))
1008 return false;
1009
1010 return true;
1011 }
1012
1013 /* Returns true if FN is the zero constant function. */
1014
1015 static bool
1016 affine_function_zero_p (affine_fn fn)
1017 {
1018 return (integer_zerop (affine_function_base (fn))
1019 && affine_function_constant_p (fn));
1020 }
1021
1022 /* Returns a signed integer type with the largest precision from TA
1023 and TB. */
1024
1025 static tree
1026 signed_type_for_types (tree ta, tree tb)
1027 {
1028 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1029 return signed_type_for (ta);
1030 else
1031 return signed_type_for (tb);
1032 }
1033
1034 /* Applies operation OP on affine functions FNA and FNB, and returns the
1035 result. */
1036
1037 static affine_fn
1038 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1039 {
1040 unsigned i, n, m;
1041 affine_fn ret;
1042 tree coef;
1043
1044 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1045 {
1046 n = VEC_length (tree, fna);
1047 m = VEC_length (tree, fnb);
1048 }
1049 else
1050 {
1051 n = VEC_length (tree, fnb);
1052 m = VEC_length (tree, fna);
1053 }
1054
1055 ret = VEC_alloc (tree, heap, m);
1056 for (i = 0; i < n; i++)
1057 {
1058 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1059 TREE_TYPE (VEC_index (tree, fnb, i)));
1060
1061 VEC_quick_push (tree, ret,
1062 fold_build2 (op, type,
1063 VEC_index (tree, fna, i),
1064 VEC_index (tree, fnb, i)));
1065 }
1066
1067 for (; VEC_iterate (tree, fna, i, coef); i++)
1068 VEC_quick_push (tree, ret,
1069 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1070 coef, integer_zero_node));
1071 for (; VEC_iterate (tree, fnb, i, coef); i++)
1072 VEC_quick_push (tree, ret,
1073 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1074 integer_zero_node, coef));
1075
1076 return ret;
1077 }
1078
1079 /* Returns the sum of affine functions FNA and FNB. */
1080
1081 static affine_fn
1082 affine_fn_plus (affine_fn fna, affine_fn fnb)
1083 {
1084 return affine_fn_op (PLUS_EXPR, fna, fnb);
1085 }
1086
1087 /* Returns the difference of affine functions FNA and FNB. */
1088
1089 static affine_fn
1090 affine_fn_minus (affine_fn fna, affine_fn fnb)
1091 {
1092 return affine_fn_op (MINUS_EXPR, fna, fnb);
1093 }
1094
1095 /* Frees affine function FN. */
1096
1097 static void
1098 affine_fn_free (affine_fn fn)
1099 {
1100 VEC_free (tree, heap, fn);
1101 }
1102
1103 /* Determine for each subscript in the data dependence relation DDR
1104 the distance. */
1105
1106 static void
1107 compute_subscript_distance (struct data_dependence_relation *ddr)
1108 {
1109 conflict_function *cf_a, *cf_b;
1110 affine_fn fn_a, fn_b, diff;
1111
1112 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1113 {
1114 unsigned int i;
1115
1116 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1117 {
1118 struct subscript *subscript;
1119
1120 subscript = DDR_SUBSCRIPT (ddr, i);
1121 cf_a = SUB_CONFLICTS_IN_A (subscript);
1122 cf_b = SUB_CONFLICTS_IN_B (subscript);
1123
1124 fn_a = common_affine_function (cf_a);
1125 fn_b = common_affine_function (cf_b);
1126 if (!fn_a || !fn_b)
1127 {
1128 SUB_DISTANCE (subscript) = chrec_dont_know;
1129 return;
1130 }
1131 diff = affine_fn_minus (fn_a, fn_b);
1132
1133 if (affine_function_constant_p (diff))
1134 SUB_DISTANCE (subscript) = affine_function_base (diff);
1135 else
1136 SUB_DISTANCE (subscript) = chrec_dont_know;
1137
1138 affine_fn_free (diff);
1139 }
1140 }
1141 }
1142
1143 /* Returns the conflict function for "unknown". */
1144
1145 static conflict_function *
1146 conflict_fn_not_known (void)
1147 {
1148 conflict_function *fn = XCNEW (conflict_function);
1149 fn->n = NOT_KNOWN;
1150
1151 return fn;
1152 }
1153
1154 /* Returns the conflict function for "independent". */
1155
1156 static conflict_function *
1157 conflict_fn_no_dependence (void)
1158 {
1159 conflict_function *fn = XCNEW (conflict_function);
1160 fn->n = NO_DEPENDENCE;
1161
1162 return fn;
1163 }
1164
1165 /* Returns true if the address of OBJ is invariant in LOOP. */
1166
1167 static bool
1168 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1169 {
1170 while (handled_component_p (obj))
1171 {
1172 if (TREE_CODE (obj) == ARRAY_REF)
1173 {
1174 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1175 need to check the stride and the lower bound of the reference. */
1176 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1177 loop->num)
1178 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1179 loop->num))
1180 return false;
1181 }
1182 else if (TREE_CODE (obj) == COMPONENT_REF)
1183 {
1184 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1185 loop->num))
1186 return false;
1187 }
1188 obj = TREE_OPERAND (obj, 0);
1189 }
1190
1191 if (!INDIRECT_REF_P (obj))
1192 return true;
1193
1194 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1195 loop->num);
1196 }
1197
1198 /* Returns true if A and B are accesses to different objects, or to different
1199 fields of the same object. */
1200
1201 static bool
1202 disjoint_objects_p (tree a, tree b)
1203 {
1204 tree base_a, base_b;
1205 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1206 bool ret;
1207
1208 base_a = get_base_address (a);
1209 base_b = get_base_address (b);
1210
1211 if (DECL_P (base_a)
1212 && DECL_P (base_b)
1213 && base_a != base_b)
1214 return true;
1215
1216 if (!operand_equal_p (base_a, base_b, 0))
1217 return false;
1218
1219 /* Compare the component references of A and B. We must start from the inner
1220 ones, so record them to the vector first. */
1221 while (handled_component_p (a))
1222 {
1223 VEC_safe_push (tree, heap, comp_a, a);
1224 a = TREE_OPERAND (a, 0);
1225 }
1226 while (handled_component_p (b))
1227 {
1228 VEC_safe_push (tree, heap, comp_b, b);
1229 b = TREE_OPERAND (b, 0);
1230 }
1231
1232 ret = false;
1233 while (1)
1234 {
1235 if (VEC_length (tree, comp_a) == 0
1236 || VEC_length (tree, comp_b) == 0)
1237 break;
1238
1239 a = VEC_pop (tree, comp_a);
1240 b = VEC_pop (tree, comp_b);
1241
1242 /* Real and imaginary part of a variable do not alias. */
1243 if ((TREE_CODE (a) == REALPART_EXPR
1244 && TREE_CODE (b) == IMAGPART_EXPR)
1245 || (TREE_CODE (a) == IMAGPART_EXPR
1246 && TREE_CODE (b) == REALPART_EXPR))
1247 {
1248 ret = true;
1249 break;
1250 }
1251
1252 if (TREE_CODE (a) != TREE_CODE (b))
1253 break;
1254
1255 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1256 DR_BASE_OBJECT are always zero. */
1257 if (TREE_CODE (a) == ARRAY_REF)
1258 continue;
1259 else if (TREE_CODE (a) == COMPONENT_REF)
1260 {
1261 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1262 continue;
1263
1264 /* Different fields of unions may overlap. */
1265 base_a = TREE_OPERAND (a, 0);
1266 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1267 break;
1268
1269 /* Different fields of structures cannot. */
1270 ret = true;
1271 break;
1272 }
1273 else
1274 break;
1275 }
1276
1277 VEC_free (tree, heap, comp_a);
1278 VEC_free (tree, heap, comp_b);
1279
1280 return ret;
1281 }
1282
1283 /* Returns false if we can prove that data references A and B do not alias,
1284 true otherwise. */
1285
1286 bool
1287 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1288 {
1289 const_tree addr_a = DR_BASE_ADDRESS (a);
1290 const_tree addr_b = DR_BASE_ADDRESS (b);
1291 const_tree type_a, type_b;
1292 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1293
1294 /* If the accessed objects are disjoint, the memory references do not
1295 alias. */
1296 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1297 return false;
1298
1299 /* Query the alias oracle. */
1300 if (!DR_IS_READ (a) && !DR_IS_READ (b))
1301 {
1302 if (!refs_output_dependent_p (DR_REF (a), DR_REF (b)))
1303 return false;
1304 }
1305 else if (DR_IS_READ (a) && !DR_IS_READ (b))
1306 {
1307 if (!refs_anti_dependent_p (DR_REF (a), DR_REF (b)))
1308 return false;
1309 }
1310 else if (!refs_may_alias_p (DR_REF (a), DR_REF (b)))
1311 return false;
1312
1313 if (!addr_a || !addr_b)
1314 return true;
1315
1316 /* If the references are based on different static objects, they cannot
1317 alias (PTA should be able to disambiguate such accesses, but often
1318 it fails to). */
1319 if (TREE_CODE (addr_a) == ADDR_EXPR
1320 && TREE_CODE (addr_b) == ADDR_EXPR)
1321 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1322
1323 /* An instruction writing through a restricted pointer is "independent" of any
1324 instruction reading or writing through a different restricted pointer,
1325 in the same block/scope. */
1326
1327 type_a = TREE_TYPE (addr_a);
1328 type_b = TREE_TYPE (addr_b);
1329 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1330
1331 if (TREE_CODE (addr_a) == SSA_NAME)
1332 decl_a = SSA_NAME_VAR (addr_a);
1333 if (TREE_CODE (addr_b) == SSA_NAME)
1334 decl_b = SSA_NAME_VAR (addr_b);
1335
1336 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1337 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1338 && decl_a && DECL_P (decl_a)
1339 && decl_b && DECL_P (decl_b)
1340 && decl_a != decl_b
1341 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1342 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1343 return false;
1344
1345 return true;
1346 }
1347
1348 static void compute_self_dependence (struct data_dependence_relation *);
1349
1350 /* Initialize a data dependence relation between data accesses A and
1351 B. NB_LOOPS is the number of loops surrounding the references: the
1352 size of the classic distance/direction vectors. */
1353
1354 static struct data_dependence_relation *
1355 initialize_data_dependence_relation (struct data_reference *a,
1356 struct data_reference *b,
1357 VEC (loop_p, heap) *loop_nest)
1358 {
1359 struct data_dependence_relation *res;
1360 unsigned int i;
1361
1362 res = XNEW (struct data_dependence_relation);
1363 DDR_A (res) = a;
1364 DDR_B (res) = b;
1365 DDR_LOOP_NEST (res) = NULL;
1366 DDR_REVERSED_P (res) = false;
1367 DDR_SUBSCRIPTS (res) = NULL;
1368 DDR_DIR_VECTS (res) = NULL;
1369 DDR_DIST_VECTS (res) = NULL;
1370
1371 if (a == NULL || b == NULL)
1372 {
1373 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1374 return res;
1375 }
1376
1377 /* If the data references do not alias, then they are independent. */
1378 if (!dr_may_alias_p (a, b))
1379 {
1380 DDR_ARE_DEPENDENT (res) = chrec_known;
1381 return res;
1382 }
1383
1384 /* When the references are exactly the same, don't spend time doing
1385 the data dependence tests, just initialize the ddr and return. */
1386 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1387 {
1388 DDR_AFFINE_P (res) = true;
1389 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1390 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1391 DDR_LOOP_NEST (res) = loop_nest;
1392 DDR_INNER_LOOP (res) = 0;
1393 DDR_SELF_REFERENCE (res) = true;
1394 compute_self_dependence (res);
1395 return res;
1396 }
1397
1398 /* If the references do not access the same object, we do not know
1399 whether they alias or not. */
1400 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1401 {
1402 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1403 return res;
1404 }
1405
1406 /* If the base of the object is not invariant in the loop nest, we cannot
1407 analyze it. TODO -- in fact, it would suffice to record that there may
1408 be arbitrary dependences in the loops where the base object varies. */
1409 if (loop_nest
1410 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1411 DR_BASE_OBJECT (a)))
1412 {
1413 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1414 return res;
1415 }
1416
1417 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1418
1419 DDR_AFFINE_P (res) = true;
1420 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1421 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1422 DDR_LOOP_NEST (res) = loop_nest;
1423 DDR_INNER_LOOP (res) = 0;
1424 DDR_SELF_REFERENCE (res) = false;
1425
1426 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1427 {
1428 struct subscript *subscript;
1429
1430 subscript = XNEW (struct subscript);
1431 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1432 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1433 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1434 SUB_DISTANCE (subscript) = chrec_dont_know;
1435 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1436 }
1437
1438 return res;
1439 }
1440
1441 /* Frees memory used by the conflict function F. */
1442
1443 static void
1444 free_conflict_function (conflict_function *f)
1445 {
1446 unsigned i;
1447
1448 if (CF_NONTRIVIAL_P (f))
1449 {
1450 for (i = 0; i < f->n; i++)
1451 affine_fn_free (f->fns[i]);
1452 }
1453 free (f);
1454 }
1455
1456 /* Frees memory used by SUBSCRIPTS. */
1457
1458 static void
1459 free_subscripts (VEC (subscript_p, heap) *subscripts)
1460 {
1461 unsigned i;
1462 subscript_p s;
1463
1464 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1465 {
1466 free_conflict_function (s->conflicting_iterations_in_a);
1467 free_conflict_function (s->conflicting_iterations_in_b);
1468 free (s);
1469 }
1470 VEC_free (subscript_p, heap, subscripts);
1471 }
1472
1473 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1474 description. */
1475
1476 static inline void
1477 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1478 tree chrec)
1479 {
1480 if (dump_file && (dump_flags & TDF_DETAILS))
1481 {
1482 fprintf (dump_file, "(dependence classified: ");
1483 print_generic_expr (dump_file, chrec, 0);
1484 fprintf (dump_file, ")\n");
1485 }
1486
1487 DDR_ARE_DEPENDENT (ddr) = chrec;
1488 free_subscripts (DDR_SUBSCRIPTS (ddr));
1489 DDR_SUBSCRIPTS (ddr) = NULL;
1490 }
1491
1492 /* The dependence relation DDR cannot be represented by a distance
1493 vector. */
1494
1495 static inline void
1496 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1497 {
1498 if (dump_file && (dump_flags & TDF_DETAILS))
1499 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1500
1501 DDR_AFFINE_P (ddr) = false;
1502 }
1503
1504 \f
1505
1506 /* This section contains the classic Banerjee tests. */
1507
1508 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1509 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1510
1511 static inline bool
1512 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1513 {
1514 return (evolution_function_is_constant_p (chrec_a)
1515 && evolution_function_is_constant_p (chrec_b));
1516 }
1517
1518 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1519 variable, i.e., if the SIV (Single Index Variable) test is true. */
1520
1521 static bool
1522 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1523 {
1524 if ((evolution_function_is_constant_p (chrec_a)
1525 && evolution_function_is_univariate_p (chrec_b))
1526 || (evolution_function_is_constant_p (chrec_b)
1527 && evolution_function_is_univariate_p (chrec_a)))
1528 return true;
1529
1530 if (evolution_function_is_univariate_p (chrec_a)
1531 && evolution_function_is_univariate_p (chrec_b))
1532 {
1533 switch (TREE_CODE (chrec_a))
1534 {
1535 case POLYNOMIAL_CHREC:
1536 switch (TREE_CODE (chrec_b))
1537 {
1538 case POLYNOMIAL_CHREC:
1539 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1540 return false;
1541
1542 default:
1543 return true;
1544 }
1545
1546 default:
1547 return true;
1548 }
1549 }
1550
1551 return false;
1552 }
1553
1554 /* Creates a conflict function with N dimensions. The affine functions
1555 in each dimension follow. */
1556
1557 static conflict_function *
1558 conflict_fn (unsigned n, ...)
1559 {
1560 unsigned i;
1561 conflict_function *ret = XCNEW (conflict_function);
1562 va_list ap;
1563
1564 gcc_assert (0 < n && n <= MAX_DIM);
1565 va_start(ap, n);
1566
1567 ret->n = n;
1568 for (i = 0; i < n; i++)
1569 ret->fns[i] = va_arg (ap, affine_fn);
1570 va_end(ap);
1571
1572 return ret;
1573 }
1574
1575 /* Returns constant affine function with value CST. */
1576
1577 static affine_fn
1578 affine_fn_cst (tree cst)
1579 {
1580 affine_fn fn = VEC_alloc (tree, heap, 1);
1581 VEC_quick_push (tree, fn, cst);
1582 return fn;
1583 }
1584
1585 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1586
1587 static affine_fn
1588 affine_fn_univar (tree cst, unsigned dim, tree coef)
1589 {
1590 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1591 unsigned i;
1592
1593 gcc_assert (dim > 0);
1594 VEC_quick_push (tree, fn, cst);
1595 for (i = 1; i < dim; i++)
1596 VEC_quick_push (tree, fn, integer_zero_node);
1597 VEC_quick_push (tree, fn, coef);
1598 return fn;
1599 }
1600
1601 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1602 *OVERLAPS_B are initialized to the functions that describe the
1603 relation between the elements accessed twice by CHREC_A and
1604 CHREC_B. For k >= 0, the following property is verified:
1605
1606 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1607
1608 static void
1609 analyze_ziv_subscript (tree chrec_a,
1610 tree chrec_b,
1611 conflict_function **overlaps_a,
1612 conflict_function **overlaps_b,
1613 tree *last_conflicts)
1614 {
1615 tree type, difference;
1616 dependence_stats.num_ziv++;
1617
1618 if (dump_file && (dump_flags & TDF_DETAILS))
1619 fprintf (dump_file, "(analyze_ziv_subscript \n");
1620
1621 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1622 chrec_a = chrec_convert (type, chrec_a, NULL);
1623 chrec_b = chrec_convert (type, chrec_b, NULL);
1624 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1625
1626 switch (TREE_CODE (difference))
1627 {
1628 case INTEGER_CST:
1629 if (integer_zerop (difference))
1630 {
1631 /* The difference is equal to zero: the accessed index
1632 overlaps for each iteration in the loop. */
1633 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1634 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1635 *last_conflicts = chrec_dont_know;
1636 dependence_stats.num_ziv_dependent++;
1637 }
1638 else
1639 {
1640 /* The accesses do not overlap. */
1641 *overlaps_a = conflict_fn_no_dependence ();
1642 *overlaps_b = conflict_fn_no_dependence ();
1643 *last_conflicts = integer_zero_node;
1644 dependence_stats.num_ziv_independent++;
1645 }
1646 break;
1647
1648 default:
1649 /* We're not sure whether the indexes overlap. For the moment,
1650 conservatively answer "don't know". */
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1652 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1653
1654 *overlaps_a = conflict_fn_not_known ();
1655 *overlaps_b = conflict_fn_not_known ();
1656 *last_conflicts = chrec_dont_know;
1657 dependence_stats.num_ziv_unimplemented++;
1658 break;
1659 }
1660
1661 if (dump_file && (dump_flags & TDF_DETAILS))
1662 fprintf (dump_file, ")\n");
1663 }
1664
1665 /* Sets NIT to the estimated number of executions of the statements in
1666 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1667 large as the number of iterations. If we have no reliable estimate,
1668 the function returns false, otherwise returns true. */
1669
1670 bool
1671 estimated_loop_iterations (struct loop *loop, bool conservative,
1672 double_int *nit)
1673 {
1674 estimate_numbers_of_iterations_loop (loop);
1675 if (conservative)
1676 {
1677 if (!loop->any_upper_bound)
1678 return false;
1679
1680 *nit = loop->nb_iterations_upper_bound;
1681 }
1682 else
1683 {
1684 if (!loop->any_estimate)
1685 return false;
1686
1687 *nit = loop->nb_iterations_estimate;
1688 }
1689
1690 return true;
1691 }
1692
1693 /* Similar to estimated_loop_iterations, but returns the estimate only
1694 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1695 on the number of iterations of LOOP could not be derived, returns -1. */
1696
1697 HOST_WIDE_INT
1698 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1699 {
1700 double_int nit;
1701 HOST_WIDE_INT hwi_nit;
1702
1703 if (!estimated_loop_iterations (loop, conservative, &nit))
1704 return -1;
1705
1706 if (!double_int_fits_in_shwi_p (nit))
1707 return -1;
1708 hwi_nit = double_int_to_shwi (nit);
1709
1710 return hwi_nit < 0 ? -1 : hwi_nit;
1711 }
1712
1713 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1714 and only if it fits to the int type. If this is not the case, or the
1715 estimate on the number of iterations of LOOP could not be derived, returns
1716 chrec_dont_know. */
1717
1718 static tree
1719 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1720 {
1721 double_int nit;
1722 tree type;
1723
1724 if (!estimated_loop_iterations (loop, conservative, &nit))
1725 return chrec_dont_know;
1726
1727 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1728 if (!double_int_fits_to_tree_p (type, nit))
1729 return chrec_dont_know;
1730
1731 return double_int_to_tree (type, nit);
1732 }
1733
1734 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1735 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1736 *OVERLAPS_B are initialized to the functions that describe the
1737 relation between the elements accessed twice by CHREC_A and
1738 CHREC_B. For k >= 0, the following property is verified:
1739
1740 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1741
1742 static void
1743 analyze_siv_subscript_cst_affine (tree chrec_a,
1744 tree chrec_b,
1745 conflict_function **overlaps_a,
1746 conflict_function **overlaps_b,
1747 tree *last_conflicts)
1748 {
1749 bool value0, value1, value2;
1750 tree type, difference, tmp;
1751
1752 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1753 chrec_a = chrec_convert (type, chrec_a, NULL);
1754 chrec_b = chrec_convert (type, chrec_b, NULL);
1755 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1756
1757 if (!chrec_is_positive (initial_condition (difference), &value0))
1758 {
1759 if (dump_file && (dump_flags & TDF_DETAILS))
1760 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1761
1762 dependence_stats.num_siv_unimplemented++;
1763 *overlaps_a = conflict_fn_not_known ();
1764 *overlaps_b = conflict_fn_not_known ();
1765 *last_conflicts = chrec_dont_know;
1766 return;
1767 }
1768 else
1769 {
1770 if (value0 == false)
1771 {
1772 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1773 {
1774 if (dump_file && (dump_flags & TDF_DETAILS))
1775 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1776
1777 *overlaps_a = conflict_fn_not_known ();
1778 *overlaps_b = conflict_fn_not_known ();
1779 *last_conflicts = chrec_dont_know;
1780 dependence_stats.num_siv_unimplemented++;
1781 return;
1782 }
1783 else
1784 {
1785 if (value1 == true)
1786 {
1787 /* Example:
1788 chrec_a = 12
1789 chrec_b = {10, +, 1}
1790 */
1791
1792 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1793 {
1794 HOST_WIDE_INT numiter;
1795 struct loop *loop = get_chrec_loop (chrec_b);
1796
1797 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1798 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1799 fold_build1 (ABS_EXPR, type, difference),
1800 CHREC_RIGHT (chrec_b));
1801 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1802 *last_conflicts = integer_one_node;
1803
1804
1805 /* Perform weak-zero siv test to see if overlap is
1806 outside the loop bounds. */
1807 numiter = estimated_loop_iterations_int (loop, false);
1808
1809 if (numiter >= 0
1810 && compare_tree_int (tmp, numiter) > 0)
1811 {
1812 free_conflict_function (*overlaps_a);
1813 free_conflict_function (*overlaps_b);
1814 *overlaps_a = conflict_fn_no_dependence ();
1815 *overlaps_b = conflict_fn_no_dependence ();
1816 *last_conflicts = integer_zero_node;
1817 dependence_stats.num_siv_independent++;
1818 return;
1819 }
1820 dependence_stats.num_siv_dependent++;
1821 return;
1822 }
1823
1824 /* When the step does not divide the difference, there are
1825 no overlaps. */
1826 else
1827 {
1828 *overlaps_a = conflict_fn_no_dependence ();
1829 *overlaps_b = conflict_fn_no_dependence ();
1830 *last_conflicts = integer_zero_node;
1831 dependence_stats.num_siv_independent++;
1832 return;
1833 }
1834 }
1835
1836 else
1837 {
1838 /* Example:
1839 chrec_a = 12
1840 chrec_b = {10, +, -1}
1841
1842 In this case, chrec_a will not overlap with chrec_b. */
1843 *overlaps_a = conflict_fn_no_dependence ();
1844 *overlaps_b = conflict_fn_no_dependence ();
1845 *last_conflicts = integer_zero_node;
1846 dependence_stats.num_siv_independent++;
1847 return;
1848 }
1849 }
1850 }
1851 else
1852 {
1853 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1854 {
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1857
1858 *overlaps_a = conflict_fn_not_known ();
1859 *overlaps_b = conflict_fn_not_known ();
1860 *last_conflicts = chrec_dont_know;
1861 dependence_stats.num_siv_unimplemented++;
1862 return;
1863 }
1864 else
1865 {
1866 if (value2 == false)
1867 {
1868 /* Example:
1869 chrec_a = 3
1870 chrec_b = {10, +, -1}
1871 */
1872 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1873 {
1874 HOST_WIDE_INT numiter;
1875 struct loop *loop = get_chrec_loop (chrec_b);
1876
1877 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1878 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1879 CHREC_RIGHT (chrec_b));
1880 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1881 *last_conflicts = integer_one_node;
1882
1883 /* Perform weak-zero siv test to see if overlap is
1884 outside the loop bounds. */
1885 numiter = estimated_loop_iterations_int (loop, false);
1886
1887 if (numiter >= 0
1888 && compare_tree_int (tmp, numiter) > 0)
1889 {
1890 free_conflict_function (*overlaps_a);
1891 free_conflict_function (*overlaps_b);
1892 *overlaps_a = conflict_fn_no_dependence ();
1893 *overlaps_b = conflict_fn_no_dependence ();
1894 *last_conflicts = integer_zero_node;
1895 dependence_stats.num_siv_independent++;
1896 return;
1897 }
1898 dependence_stats.num_siv_dependent++;
1899 return;
1900 }
1901
1902 /* When the step does not divide the difference, there
1903 are no overlaps. */
1904 else
1905 {
1906 *overlaps_a = conflict_fn_no_dependence ();
1907 *overlaps_b = conflict_fn_no_dependence ();
1908 *last_conflicts = integer_zero_node;
1909 dependence_stats.num_siv_independent++;
1910 return;
1911 }
1912 }
1913 else
1914 {
1915 /* Example:
1916 chrec_a = 3
1917 chrec_b = {4, +, 1}
1918
1919 In this case, chrec_a will not overlap with chrec_b. */
1920 *overlaps_a = conflict_fn_no_dependence ();
1921 *overlaps_b = conflict_fn_no_dependence ();
1922 *last_conflicts = integer_zero_node;
1923 dependence_stats.num_siv_independent++;
1924 return;
1925 }
1926 }
1927 }
1928 }
1929 }
1930
1931 /* Helper recursive function for initializing the matrix A. Returns
1932 the initial value of CHREC. */
1933
1934 static tree
1935 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1936 {
1937 gcc_assert (chrec);
1938
1939 switch (TREE_CODE (chrec))
1940 {
1941 case POLYNOMIAL_CHREC:
1942 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1943
1944 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1945 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1946
1947 case PLUS_EXPR:
1948 case MULT_EXPR:
1949 case MINUS_EXPR:
1950 {
1951 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1952 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1953
1954 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1955 }
1956
1957 case NOP_EXPR:
1958 {
1959 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1960 return chrec_convert (chrec_type (chrec), op, NULL);
1961 }
1962
1963 case BIT_NOT_EXPR:
1964 {
1965 /* Handle ~X as -1 - X. */
1966 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1967 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1968 build_int_cst (TREE_TYPE (chrec), -1), op);
1969 }
1970
1971 case INTEGER_CST:
1972 return chrec;
1973
1974 default:
1975 gcc_unreachable ();
1976 return NULL_TREE;
1977 }
1978 }
1979
1980 #define FLOOR_DIV(x,y) ((x) / (y))
1981
1982 /* Solves the special case of the Diophantine equation:
1983 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1984
1985 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1986 number of iterations that loops X and Y run. The overlaps will be
1987 constructed as evolutions in dimension DIM. */
1988
1989 static void
1990 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1991 affine_fn *overlaps_a,
1992 affine_fn *overlaps_b,
1993 tree *last_conflicts, int dim)
1994 {
1995 if (((step_a > 0 && step_b > 0)
1996 || (step_a < 0 && step_b < 0)))
1997 {
1998 int step_overlaps_a, step_overlaps_b;
1999 int gcd_steps_a_b, last_conflict, tau2;
2000
2001 gcd_steps_a_b = gcd (step_a, step_b);
2002 step_overlaps_a = step_b / gcd_steps_a_b;
2003 step_overlaps_b = step_a / gcd_steps_a_b;
2004
2005 if (niter > 0)
2006 {
2007 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2008 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2009 last_conflict = tau2;
2010 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2011 }
2012 else
2013 *last_conflicts = chrec_dont_know;
2014
2015 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2016 build_int_cst (NULL_TREE,
2017 step_overlaps_a));
2018 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2019 build_int_cst (NULL_TREE,
2020 step_overlaps_b));
2021 }
2022
2023 else
2024 {
2025 *overlaps_a = affine_fn_cst (integer_zero_node);
2026 *overlaps_b = affine_fn_cst (integer_zero_node);
2027 *last_conflicts = integer_zero_node;
2028 }
2029 }
2030
2031 /* Solves the special case of a Diophantine equation where CHREC_A is
2032 an affine bivariate function, and CHREC_B is an affine univariate
2033 function. For example,
2034
2035 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2036
2037 has the following overlapping functions:
2038
2039 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2040 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2041 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2042
2043 FORNOW: This is a specialized implementation for a case occurring in
2044 a common benchmark. Implement the general algorithm. */
2045
2046 static void
2047 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2048 conflict_function **overlaps_a,
2049 conflict_function **overlaps_b,
2050 tree *last_conflicts)
2051 {
2052 bool xz_p, yz_p, xyz_p;
2053 int step_x, step_y, step_z;
2054 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2055 affine_fn overlaps_a_xz, overlaps_b_xz;
2056 affine_fn overlaps_a_yz, overlaps_b_yz;
2057 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2058 affine_fn ova1, ova2, ovb;
2059 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2060
2061 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2062 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2063 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2064
2065 niter_x =
2066 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2067 false);
2068 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2069 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2070
2071 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2072 {
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2075
2076 *overlaps_a = conflict_fn_not_known ();
2077 *overlaps_b = conflict_fn_not_known ();
2078 *last_conflicts = chrec_dont_know;
2079 return;
2080 }
2081
2082 niter = MIN (niter_x, niter_z);
2083 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2084 &overlaps_a_xz,
2085 &overlaps_b_xz,
2086 &last_conflicts_xz, 1);
2087 niter = MIN (niter_y, niter_z);
2088 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2089 &overlaps_a_yz,
2090 &overlaps_b_yz,
2091 &last_conflicts_yz, 2);
2092 niter = MIN (niter_x, niter_z);
2093 niter = MIN (niter_y, niter);
2094 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2095 &overlaps_a_xyz,
2096 &overlaps_b_xyz,
2097 &last_conflicts_xyz, 3);
2098
2099 xz_p = !integer_zerop (last_conflicts_xz);
2100 yz_p = !integer_zerop (last_conflicts_yz);
2101 xyz_p = !integer_zerop (last_conflicts_xyz);
2102
2103 if (xz_p || yz_p || xyz_p)
2104 {
2105 ova1 = affine_fn_cst (integer_zero_node);
2106 ova2 = affine_fn_cst (integer_zero_node);
2107 ovb = affine_fn_cst (integer_zero_node);
2108 if (xz_p)
2109 {
2110 affine_fn t0 = ova1;
2111 affine_fn t2 = ovb;
2112
2113 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2114 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2115 affine_fn_free (t0);
2116 affine_fn_free (t2);
2117 *last_conflicts = last_conflicts_xz;
2118 }
2119 if (yz_p)
2120 {
2121 affine_fn t0 = ova2;
2122 affine_fn t2 = ovb;
2123
2124 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2125 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2126 affine_fn_free (t0);
2127 affine_fn_free (t2);
2128 *last_conflicts = last_conflicts_yz;
2129 }
2130 if (xyz_p)
2131 {
2132 affine_fn t0 = ova1;
2133 affine_fn t2 = ova2;
2134 affine_fn t4 = ovb;
2135
2136 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2137 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2138 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2139 affine_fn_free (t0);
2140 affine_fn_free (t2);
2141 affine_fn_free (t4);
2142 *last_conflicts = last_conflicts_xyz;
2143 }
2144 *overlaps_a = conflict_fn (2, ova1, ova2);
2145 *overlaps_b = conflict_fn (1, ovb);
2146 }
2147 else
2148 {
2149 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2150 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2151 *last_conflicts = integer_zero_node;
2152 }
2153
2154 affine_fn_free (overlaps_a_xz);
2155 affine_fn_free (overlaps_b_xz);
2156 affine_fn_free (overlaps_a_yz);
2157 affine_fn_free (overlaps_b_yz);
2158 affine_fn_free (overlaps_a_xyz);
2159 affine_fn_free (overlaps_b_xyz);
2160 }
2161
2162 /* Determines the overlapping elements due to accesses CHREC_A and
2163 CHREC_B, that are affine functions. This function cannot handle
2164 symbolic evolution functions, ie. when initial conditions are
2165 parameters, because it uses lambda matrices of integers. */
2166
2167 static void
2168 analyze_subscript_affine_affine (tree chrec_a,
2169 tree chrec_b,
2170 conflict_function **overlaps_a,
2171 conflict_function **overlaps_b,
2172 tree *last_conflicts)
2173 {
2174 unsigned nb_vars_a, nb_vars_b, dim;
2175 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2176 lambda_matrix A, U, S;
2177 struct obstack scratch_obstack;
2178
2179 if (eq_evolutions_p (chrec_a, chrec_b))
2180 {
2181 /* The accessed index overlaps for each iteration in the
2182 loop. */
2183 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2184 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2185 *last_conflicts = chrec_dont_know;
2186 return;
2187 }
2188 if (dump_file && (dump_flags & TDF_DETAILS))
2189 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2190
2191 /* For determining the initial intersection, we have to solve a
2192 Diophantine equation. This is the most time consuming part.
2193
2194 For answering to the question: "Is there a dependence?" we have
2195 to prove that there exists a solution to the Diophantine
2196 equation, and that the solution is in the iteration domain,
2197 i.e. the solution is positive or zero, and that the solution
2198 happens before the upper bound loop.nb_iterations. Otherwise
2199 there is no dependence. This function outputs a description of
2200 the iterations that hold the intersections. */
2201
2202 nb_vars_a = nb_vars_in_chrec (chrec_a);
2203 nb_vars_b = nb_vars_in_chrec (chrec_b);
2204
2205 gcc_obstack_init (&scratch_obstack);
2206
2207 dim = nb_vars_a + nb_vars_b;
2208 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2209 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2210 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2211
2212 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2213 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2214 gamma = init_b - init_a;
2215
2216 /* Don't do all the hard work of solving the Diophantine equation
2217 when we already know the solution: for example,
2218 | {3, +, 1}_1
2219 | {3, +, 4}_2
2220 | gamma = 3 - 3 = 0.
2221 Then the first overlap occurs during the first iterations:
2222 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2223 */
2224 if (gamma == 0)
2225 {
2226 if (nb_vars_a == 1 && nb_vars_b == 1)
2227 {
2228 HOST_WIDE_INT step_a, step_b;
2229 HOST_WIDE_INT niter, niter_a, niter_b;
2230 affine_fn ova, ovb;
2231
2232 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2233 false);
2234 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2235 false);
2236 niter = MIN (niter_a, niter_b);
2237 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2238 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2239
2240 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2241 &ova, &ovb,
2242 last_conflicts, 1);
2243 *overlaps_a = conflict_fn (1, ova);
2244 *overlaps_b = conflict_fn (1, ovb);
2245 }
2246
2247 else if (nb_vars_a == 2 && nb_vars_b == 1)
2248 compute_overlap_steps_for_affine_1_2
2249 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2250
2251 else if (nb_vars_a == 1 && nb_vars_b == 2)
2252 compute_overlap_steps_for_affine_1_2
2253 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2254
2255 else
2256 {
2257 if (dump_file && (dump_flags & TDF_DETAILS))
2258 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2259 *overlaps_a = conflict_fn_not_known ();
2260 *overlaps_b = conflict_fn_not_known ();
2261 *last_conflicts = chrec_dont_know;
2262 }
2263 goto end_analyze_subs_aa;
2264 }
2265
2266 /* U.A = S */
2267 lambda_matrix_right_hermite (A, dim, 1, S, U);
2268
2269 if (S[0][0] < 0)
2270 {
2271 S[0][0] *= -1;
2272 lambda_matrix_row_negate (U, dim, 0);
2273 }
2274 gcd_alpha_beta = S[0][0];
2275
2276 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2277 but that is a quite strange case. Instead of ICEing, answer
2278 don't know. */
2279 if (gcd_alpha_beta == 0)
2280 {
2281 *overlaps_a = conflict_fn_not_known ();
2282 *overlaps_b = conflict_fn_not_known ();
2283 *last_conflicts = chrec_dont_know;
2284 goto end_analyze_subs_aa;
2285 }
2286
2287 /* The classic "gcd-test". */
2288 if (!int_divides_p (gcd_alpha_beta, gamma))
2289 {
2290 /* The "gcd-test" has determined that there is no integer
2291 solution, i.e. there is no dependence. */
2292 *overlaps_a = conflict_fn_no_dependence ();
2293 *overlaps_b = conflict_fn_no_dependence ();
2294 *last_conflicts = integer_zero_node;
2295 }
2296
2297 /* Both access functions are univariate. This includes SIV and MIV cases. */
2298 else if (nb_vars_a == 1 && nb_vars_b == 1)
2299 {
2300 /* Both functions should have the same evolution sign. */
2301 if (((A[0][0] > 0 && -A[1][0] > 0)
2302 || (A[0][0] < 0 && -A[1][0] < 0)))
2303 {
2304 /* The solutions are given by:
2305 |
2306 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2307 | [u21 u22] [y0]
2308
2309 For a given integer t. Using the following variables,
2310
2311 | i0 = u11 * gamma / gcd_alpha_beta
2312 | j0 = u12 * gamma / gcd_alpha_beta
2313 | i1 = u21
2314 | j1 = u22
2315
2316 the solutions are:
2317
2318 | x0 = i0 + i1 * t,
2319 | y0 = j0 + j1 * t. */
2320 HOST_WIDE_INT i0, j0, i1, j1;
2321
2322 i0 = U[0][0] * gamma / gcd_alpha_beta;
2323 j0 = U[0][1] * gamma / gcd_alpha_beta;
2324 i1 = U[1][0];
2325 j1 = U[1][1];
2326
2327 if ((i1 == 0 && i0 < 0)
2328 || (j1 == 0 && j0 < 0))
2329 {
2330 /* There is no solution.
2331 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2332 falls in here, but for the moment we don't look at the
2333 upper bound of the iteration domain. */
2334 *overlaps_a = conflict_fn_no_dependence ();
2335 *overlaps_b = conflict_fn_no_dependence ();
2336 *last_conflicts = integer_zero_node;
2337 goto end_analyze_subs_aa;
2338 }
2339
2340 if (i1 > 0 && j1 > 0)
2341 {
2342 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2343 (get_chrec_loop (chrec_a), false);
2344 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2345 (get_chrec_loop (chrec_b), false);
2346 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2347
2348 /* (X0, Y0) is a solution of the Diophantine equation:
2349 "chrec_a (X0) = chrec_b (Y0)". */
2350 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2351 CEIL (-j0, j1));
2352 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2353 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2354
2355 /* (X1, Y1) is the smallest positive solution of the eq
2356 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2357 first conflict occurs. */
2358 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2359 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2360 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2361
2362 if (niter > 0)
2363 {
2364 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2365 FLOOR_DIV (niter - j0, j1));
2366 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2367
2368 /* If the overlap occurs outside of the bounds of the
2369 loop, there is no dependence. */
2370 if (x1 >= niter || y1 >= niter)
2371 {
2372 *overlaps_a = conflict_fn_no_dependence ();
2373 *overlaps_b = conflict_fn_no_dependence ();
2374 *last_conflicts = integer_zero_node;
2375 goto end_analyze_subs_aa;
2376 }
2377 else
2378 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2379 }
2380 else
2381 *last_conflicts = chrec_dont_know;
2382
2383 *overlaps_a
2384 = conflict_fn (1,
2385 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2386 1,
2387 build_int_cst (NULL_TREE, i1)));
2388 *overlaps_b
2389 = conflict_fn (1,
2390 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2391 1,
2392 build_int_cst (NULL_TREE, j1)));
2393 }
2394 else
2395 {
2396 /* FIXME: For the moment, the upper bound of the
2397 iteration domain for i and j is not checked. */
2398 if (dump_file && (dump_flags & TDF_DETAILS))
2399 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2400 *overlaps_a = conflict_fn_not_known ();
2401 *overlaps_b = conflict_fn_not_known ();
2402 *last_conflicts = chrec_dont_know;
2403 }
2404 }
2405 else
2406 {
2407 if (dump_file && (dump_flags & TDF_DETAILS))
2408 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2409 *overlaps_a = conflict_fn_not_known ();
2410 *overlaps_b = conflict_fn_not_known ();
2411 *last_conflicts = chrec_dont_know;
2412 }
2413 }
2414 else
2415 {
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2418 *overlaps_a = conflict_fn_not_known ();
2419 *overlaps_b = conflict_fn_not_known ();
2420 *last_conflicts = chrec_dont_know;
2421 }
2422
2423 end_analyze_subs_aa:
2424 obstack_free (&scratch_obstack, NULL);
2425 if (dump_file && (dump_flags & TDF_DETAILS))
2426 {
2427 fprintf (dump_file, " (overlaps_a = ");
2428 dump_conflict_function (dump_file, *overlaps_a);
2429 fprintf (dump_file, ")\n (overlaps_b = ");
2430 dump_conflict_function (dump_file, *overlaps_b);
2431 fprintf (dump_file, ")\n");
2432 fprintf (dump_file, ")\n");
2433 }
2434 }
2435
2436 /* Returns true when analyze_subscript_affine_affine can be used for
2437 determining the dependence relation between chrec_a and chrec_b,
2438 that contain symbols. This function modifies chrec_a and chrec_b
2439 such that the analysis result is the same, and such that they don't
2440 contain symbols, and then can safely be passed to the analyzer.
2441
2442 Example: The analysis of the following tuples of evolutions produce
2443 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2444 vs. {0, +, 1}_1
2445
2446 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2447 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2448 */
2449
2450 static bool
2451 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2452 {
2453 tree diff, type, left_a, left_b, right_b;
2454
2455 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2456 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2457 /* FIXME: For the moment not handled. Might be refined later. */
2458 return false;
2459
2460 type = chrec_type (*chrec_a);
2461 left_a = CHREC_LEFT (*chrec_a);
2462 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2463 diff = chrec_fold_minus (type, left_a, left_b);
2464
2465 if (!evolution_function_is_constant_p (diff))
2466 return false;
2467
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2470
2471 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2472 diff, CHREC_RIGHT (*chrec_a));
2473 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2474 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2475 build_int_cst (type, 0),
2476 right_b);
2477 return true;
2478 }
2479
2480 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2481 *OVERLAPS_B are initialized to the functions that describe the
2482 relation between the elements accessed twice by CHREC_A and
2483 CHREC_B. For k >= 0, the following property is verified:
2484
2485 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2486
2487 static void
2488 analyze_siv_subscript (tree chrec_a,
2489 tree chrec_b,
2490 conflict_function **overlaps_a,
2491 conflict_function **overlaps_b,
2492 tree *last_conflicts,
2493 int loop_nest_num)
2494 {
2495 dependence_stats.num_siv++;
2496
2497 if (dump_file && (dump_flags & TDF_DETAILS))
2498 fprintf (dump_file, "(analyze_siv_subscript \n");
2499
2500 if (evolution_function_is_constant_p (chrec_a)
2501 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2502 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2503 overlaps_a, overlaps_b, last_conflicts);
2504
2505 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2506 && evolution_function_is_constant_p (chrec_b))
2507 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2508 overlaps_b, overlaps_a, last_conflicts);
2509
2510 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2511 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2512 {
2513 if (!chrec_contains_symbols (chrec_a)
2514 && !chrec_contains_symbols (chrec_b))
2515 {
2516 analyze_subscript_affine_affine (chrec_a, chrec_b,
2517 overlaps_a, overlaps_b,
2518 last_conflicts);
2519
2520 if (CF_NOT_KNOWN_P (*overlaps_a)
2521 || CF_NOT_KNOWN_P (*overlaps_b))
2522 dependence_stats.num_siv_unimplemented++;
2523 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2524 || CF_NO_DEPENDENCE_P (*overlaps_b))
2525 dependence_stats.num_siv_independent++;
2526 else
2527 dependence_stats.num_siv_dependent++;
2528 }
2529 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2530 &chrec_b))
2531 {
2532 analyze_subscript_affine_affine (chrec_a, chrec_b,
2533 overlaps_a, overlaps_b,
2534 last_conflicts);
2535
2536 if (CF_NOT_KNOWN_P (*overlaps_a)
2537 || CF_NOT_KNOWN_P (*overlaps_b))
2538 dependence_stats.num_siv_unimplemented++;
2539 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2540 || CF_NO_DEPENDENCE_P (*overlaps_b))
2541 dependence_stats.num_siv_independent++;
2542 else
2543 dependence_stats.num_siv_dependent++;
2544 }
2545 else
2546 goto siv_subscript_dontknow;
2547 }
2548
2549 else
2550 {
2551 siv_subscript_dontknow:;
2552 if (dump_file && (dump_flags & TDF_DETAILS))
2553 fprintf (dump_file, "siv test failed: unimplemented.\n");
2554 *overlaps_a = conflict_fn_not_known ();
2555 *overlaps_b = conflict_fn_not_known ();
2556 *last_conflicts = chrec_dont_know;
2557 dependence_stats.num_siv_unimplemented++;
2558 }
2559
2560 if (dump_file && (dump_flags & TDF_DETAILS))
2561 fprintf (dump_file, ")\n");
2562 }
2563
2564 /* Returns false if we can prove that the greatest common divisor of the steps
2565 of CHREC does not divide CST, false otherwise. */
2566
2567 static bool
2568 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2569 {
2570 HOST_WIDE_INT cd = 0, val;
2571 tree step;
2572
2573 if (!host_integerp (cst, 0))
2574 return true;
2575 val = tree_low_cst (cst, 0);
2576
2577 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2578 {
2579 step = CHREC_RIGHT (chrec);
2580 if (!host_integerp (step, 0))
2581 return true;
2582 cd = gcd (cd, tree_low_cst (step, 0));
2583 chrec = CHREC_LEFT (chrec);
2584 }
2585
2586 return val % cd == 0;
2587 }
2588
2589 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2590 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2591 functions that describe the relation between the elements accessed
2592 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2593 is verified:
2594
2595 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2596
2597 static void
2598 analyze_miv_subscript (tree chrec_a,
2599 tree chrec_b,
2600 conflict_function **overlaps_a,
2601 conflict_function **overlaps_b,
2602 tree *last_conflicts,
2603 struct loop *loop_nest)
2604 {
2605 /* FIXME: This is a MIV subscript, not yet handled.
2606 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2607 (A[i] vs. A[j]).
2608
2609 In the SIV test we had to solve a Diophantine equation with two
2610 variables. In the MIV case we have to solve a Diophantine
2611 equation with 2*n variables (if the subscript uses n IVs).
2612 */
2613 tree type, difference;
2614
2615 dependence_stats.num_miv++;
2616 if (dump_file && (dump_flags & TDF_DETAILS))
2617 fprintf (dump_file, "(analyze_miv_subscript \n");
2618
2619 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2620 chrec_a = chrec_convert (type, chrec_a, NULL);
2621 chrec_b = chrec_convert (type, chrec_b, NULL);
2622 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2623
2624 if (eq_evolutions_p (chrec_a, chrec_b))
2625 {
2626 /* Access functions are the same: all the elements are accessed
2627 in the same order. */
2628 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2629 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2630 *last_conflicts = estimated_loop_iterations_tree
2631 (get_chrec_loop (chrec_a), true);
2632 dependence_stats.num_miv_dependent++;
2633 }
2634
2635 else if (evolution_function_is_constant_p (difference)
2636 /* For the moment, the following is verified:
2637 evolution_function_is_affine_multivariate_p (chrec_a,
2638 loop_nest->num) */
2639 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2640 {
2641 /* testsuite/.../ssa-chrec-33.c
2642 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2643
2644 The difference is 1, and all the evolution steps are multiples
2645 of 2, consequently there are no overlapping elements. */
2646 *overlaps_a = conflict_fn_no_dependence ();
2647 *overlaps_b = conflict_fn_no_dependence ();
2648 *last_conflicts = integer_zero_node;
2649 dependence_stats.num_miv_independent++;
2650 }
2651
2652 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2653 && !chrec_contains_symbols (chrec_a)
2654 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2655 && !chrec_contains_symbols (chrec_b))
2656 {
2657 /* testsuite/.../ssa-chrec-35.c
2658 {0, +, 1}_2 vs. {0, +, 1}_3
2659 the overlapping elements are respectively located at iterations:
2660 {0, +, 1}_x and {0, +, 1}_x,
2661 in other words, we have the equality:
2662 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2663
2664 Other examples:
2665 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2666 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2667
2668 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2669 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2670 */
2671 analyze_subscript_affine_affine (chrec_a, chrec_b,
2672 overlaps_a, overlaps_b, last_conflicts);
2673
2674 if (CF_NOT_KNOWN_P (*overlaps_a)
2675 || CF_NOT_KNOWN_P (*overlaps_b))
2676 dependence_stats.num_miv_unimplemented++;
2677 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2678 || CF_NO_DEPENDENCE_P (*overlaps_b))
2679 dependence_stats.num_miv_independent++;
2680 else
2681 dependence_stats.num_miv_dependent++;
2682 }
2683
2684 else
2685 {
2686 /* When the analysis is too difficult, answer "don't know". */
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2689
2690 *overlaps_a = conflict_fn_not_known ();
2691 *overlaps_b = conflict_fn_not_known ();
2692 *last_conflicts = chrec_dont_know;
2693 dependence_stats.num_miv_unimplemented++;
2694 }
2695
2696 if (dump_file && (dump_flags & TDF_DETAILS))
2697 fprintf (dump_file, ")\n");
2698 }
2699
2700 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2701 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2702 OVERLAP_ITERATIONS_B are initialized with two functions that
2703 describe the iterations that contain conflicting elements.
2704
2705 Remark: For an integer k >= 0, the following equality is true:
2706
2707 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2708 */
2709
2710 static void
2711 analyze_overlapping_iterations (tree chrec_a,
2712 tree chrec_b,
2713 conflict_function **overlap_iterations_a,
2714 conflict_function **overlap_iterations_b,
2715 tree *last_conflicts, struct loop *loop_nest)
2716 {
2717 unsigned int lnn = loop_nest->num;
2718
2719 dependence_stats.num_subscript_tests++;
2720
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2722 {
2723 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2724 fprintf (dump_file, " (chrec_a = ");
2725 print_generic_expr (dump_file, chrec_a, 0);
2726 fprintf (dump_file, ")\n (chrec_b = ");
2727 print_generic_expr (dump_file, chrec_b, 0);
2728 fprintf (dump_file, ")\n");
2729 }
2730
2731 if (chrec_a == NULL_TREE
2732 || chrec_b == NULL_TREE
2733 || chrec_contains_undetermined (chrec_a)
2734 || chrec_contains_undetermined (chrec_b))
2735 {
2736 dependence_stats.num_subscript_undetermined++;
2737
2738 *overlap_iterations_a = conflict_fn_not_known ();
2739 *overlap_iterations_b = conflict_fn_not_known ();
2740 }
2741
2742 /* If they are the same chrec, and are affine, they overlap
2743 on every iteration. */
2744 else if (eq_evolutions_p (chrec_a, chrec_b)
2745 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2746 {
2747 dependence_stats.num_same_subscript_function++;
2748 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2749 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2750 *last_conflicts = chrec_dont_know;
2751 }
2752
2753 /* If they aren't the same, and aren't affine, we can't do anything
2754 yet. */
2755 else if ((chrec_contains_symbols (chrec_a)
2756 || chrec_contains_symbols (chrec_b))
2757 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2758 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2759 {
2760 dependence_stats.num_subscript_undetermined++;
2761 *overlap_iterations_a = conflict_fn_not_known ();
2762 *overlap_iterations_b = conflict_fn_not_known ();
2763 }
2764
2765 else if (ziv_subscript_p (chrec_a, chrec_b))
2766 analyze_ziv_subscript (chrec_a, chrec_b,
2767 overlap_iterations_a, overlap_iterations_b,
2768 last_conflicts);
2769
2770 else if (siv_subscript_p (chrec_a, chrec_b))
2771 analyze_siv_subscript (chrec_a, chrec_b,
2772 overlap_iterations_a, overlap_iterations_b,
2773 last_conflicts, lnn);
2774
2775 else
2776 analyze_miv_subscript (chrec_a, chrec_b,
2777 overlap_iterations_a, overlap_iterations_b,
2778 last_conflicts, loop_nest);
2779
2780 if (dump_file && (dump_flags & TDF_DETAILS))
2781 {
2782 fprintf (dump_file, " (overlap_iterations_a = ");
2783 dump_conflict_function (dump_file, *overlap_iterations_a);
2784 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2785 dump_conflict_function (dump_file, *overlap_iterations_b);
2786 fprintf (dump_file, ")\n");
2787 fprintf (dump_file, ")\n");
2788 }
2789 }
2790
2791 /* Helper function for uniquely inserting distance vectors. */
2792
2793 static void
2794 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2795 {
2796 unsigned i;
2797 lambda_vector v;
2798
2799 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2800 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2801 return;
2802
2803 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2804 }
2805
2806 /* Helper function for uniquely inserting direction vectors. */
2807
2808 static void
2809 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2810 {
2811 unsigned i;
2812 lambda_vector v;
2813
2814 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2815 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2816 return;
2817
2818 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2819 }
2820
2821 /* Add a distance of 1 on all the loops outer than INDEX. If we
2822 haven't yet determined a distance for this outer loop, push a new
2823 distance vector composed of the previous distance, and a distance
2824 of 1 for this outer loop. Example:
2825
2826 | loop_1
2827 | loop_2
2828 | A[10]
2829 | endloop_2
2830 | endloop_1
2831
2832 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2833 save (0, 1), then we have to save (1, 0). */
2834
2835 static void
2836 add_outer_distances (struct data_dependence_relation *ddr,
2837 lambda_vector dist_v, int index)
2838 {
2839 /* For each outer loop where init_v is not set, the accesses are
2840 in dependence of distance 1 in the loop. */
2841 while (--index >= 0)
2842 {
2843 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2844 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2845 save_v[index] = 1;
2846 save_dist_v (ddr, save_v);
2847 }
2848 }
2849
2850 /* Return false when fail to represent the data dependence as a
2851 distance vector. INIT_B is set to true when a component has been
2852 added to the distance vector DIST_V. INDEX_CARRY is then set to
2853 the index in DIST_V that carries the dependence. */
2854
2855 static bool
2856 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2857 struct data_reference *ddr_a,
2858 struct data_reference *ddr_b,
2859 lambda_vector dist_v, bool *init_b,
2860 int *index_carry)
2861 {
2862 unsigned i;
2863 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2864
2865 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2866 {
2867 tree access_fn_a, access_fn_b;
2868 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2869
2870 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2871 {
2872 non_affine_dependence_relation (ddr);
2873 return false;
2874 }
2875
2876 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2877 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2878
2879 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2880 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2881 {
2882 int dist, index;
2883 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2884 DDR_LOOP_NEST (ddr));
2885 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2886 DDR_LOOP_NEST (ddr));
2887
2888 /* The dependence is carried by the outermost loop. Example:
2889 | loop_1
2890 | A[{4, +, 1}_1]
2891 | loop_2
2892 | A[{5, +, 1}_2]
2893 | endloop_2
2894 | endloop_1
2895 In this case, the dependence is carried by loop_1. */
2896 index = index_a < index_b ? index_a : index_b;
2897 *index_carry = MIN (index, *index_carry);
2898
2899 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2900 {
2901 non_affine_dependence_relation (ddr);
2902 return false;
2903 }
2904
2905 dist = int_cst_value (SUB_DISTANCE (subscript));
2906
2907 /* This is the subscript coupling test. If we have already
2908 recorded a distance for this loop (a distance coming from
2909 another subscript), it should be the same. For example,
2910 in the following code, there is no dependence:
2911
2912 | loop i = 0, N, 1
2913 | T[i+1][i] = ...
2914 | ... = T[i][i]
2915 | endloop
2916 */
2917 if (init_v[index] != 0 && dist_v[index] != dist)
2918 {
2919 finalize_ddr_dependent (ddr, chrec_known);
2920 return false;
2921 }
2922
2923 dist_v[index] = dist;
2924 init_v[index] = 1;
2925 *init_b = true;
2926 }
2927 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2928 {
2929 /* This can be for example an affine vs. constant dependence
2930 (T[i] vs. T[3]) that is not an affine dependence and is
2931 not representable as a distance vector. */
2932 non_affine_dependence_relation (ddr);
2933 return false;
2934 }
2935 }
2936
2937 return true;
2938 }
2939
2940 /* Return true when the DDR contains only constant access functions. */
2941
2942 static bool
2943 constant_access_functions (const struct data_dependence_relation *ddr)
2944 {
2945 unsigned i;
2946
2947 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2948 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2949 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2950 return false;
2951
2952 return true;
2953 }
2954
2955 /* Helper function for the case where DDR_A and DDR_B are the same
2956 multivariate access function with a constant step. For an example
2957 see pr34635-1.c. */
2958
2959 static void
2960 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2961 {
2962 int x_1, x_2;
2963 tree c_1 = CHREC_LEFT (c_2);
2964 tree c_0 = CHREC_LEFT (c_1);
2965 lambda_vector dist_v;
2966 int v1, v2, cd;
2967
2968 /* Polynomials with more than 2 variables are not handled yet. When
2969 the evolution steps are parameters, it is not possible to
2970 represent the dependence using classical distance vectors. */
2971 if (TREE_CODE (c_0) != INTEGER_CST
2972 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2973 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2974 {
2975 DDR_AFFINE_P (ddr) = false;
2976 return;
2977 }
2978
2979 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2980 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2981
2982 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2983 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2984 v1 = int_cst_value (CHREC_RIGHT (c_1));
2985 v2 = int_cst_value (CHREC_RIGHT (c_2));
2986 cd = gcd (v1, v2);
2987 v1 /= cd;
2988 v2 /= cd;
2989
2990 if (v2 < 0)
2991 {
2992 v2 = -v2;
2993 v1 = -v1;
2994 }
2995
2996 dist_v[x_1] = v2;
2997 dist_v[x_2] = -v1;
2998 save_dist_v (ddr, dist_v);
2999
3000 add_outer_distances (ddr, dist_v, x_1);
3001 }
3002
3003 /* Helper function for the case where DDR_A and DDR_B are the same
3004 access functions. */
3005
3006 static void
3007 add_other_self_distances (struct data_dependence_relation *ddr)
3008 {
3009 lambda_vector dist_v;
3010 unsigned i;
3011 int index_carry = DDR_NB_LOOPS (ddr);
3012
3013 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3014 {
3015 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3016
3017 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3018 {
3019 if (!evolution_function_is_univariate_p (access_fun))
3020 {
3021 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3022 {
3023 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3024 return;
3025 }
3026
3027 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3028
3029 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3030 add_multivariate_self_dist (ddr, access_fun);
3031 else
3032 /* The evolution step is not constant: it varies in
3033 the outer loop, so this cannot be represented by a
3034 distance vector. For example in pr34635.c the
3035 evolution is {0, +, {0, +, 4}_1}_2. */
3036 DDR_AFFINE_P (ddr) = false;
3037
3038 return;
3039 }
3040
3041 index_carry = MIN (index_carry,
3042 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3043 DDR_LOOP_NEST (ddr)));
3044 }
3045 }
3046
3047 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3048 add_outer_distances (ddr, dist_v, index_carry);
3049 }
3050
3051 static void
3052 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3053 {
3054 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3055
3056 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3057 save_dist_v (ddr, dist_v);
3058 }
3059
3060 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3061 is the case for example when access functions are the same and
3062 equal to a constant, as in:
3063
3064 | loop_1
3065 | A[3] = ...
3066 | ... = A[3]
3067 | endloop_1
3068
3069 in which case the distance vectors are (0) and (1). */
3070
3071 static void
3072 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3073 {
3074 unsigned i, j;
3075
3076 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3077 {
3078 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3079 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3080 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3081
3082 for (j = 0; j < ca->n; j++)
3083 if (affine_function_zero_p (ca->fns[j]))
3084 {
3085 insert_innermost_unit_dist_vector (ddr);
3086 return;
3087 }
3088
3089 for (j = 0; j < cb->n; j++)
3090 if (affine_function_zero_p (cb->fns[j]))
3091 {
3092 insert_innermost_unit_dist_vector (ddr);
3093 return;
3094 }
3095 }
3096 }
3097
3098 /* Compute the classic per loop distance vector. DDR is the data
3099 dependence relation to build a vector from. Return false when fail
3100 to represent the data dependence as a distance vector. */
3101
3102 static bool
3103 build_classic_dist_vector (struct data_dependence_relation *ddr,
3104 struct loop *loop_nest)
3105 {
3106 bool init_b = false;
3107 int index_carry = DDR_NB_LOOPS (ddr);
3108 lambda_vector dist_v;
3109
3110 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3111 return false;
3112
3113 if (same_access_functions (ddr))
3114 {
3115 /* Save the 0 vector. */
3116 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3117 save_dist_v (ddr, dist_v);
3118
3119 if (constant_access_functions (ddr))
3120 add_distance_for_zero_overlaps (ddr);
3121
3122 if (DDR_NB_LOOPS (ddr) > 1)
3123 add_other_self_distances (ddr);
3124
3125 return true;
3126 }
3127
3128 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3129 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3130 dist_v, &init_b, &index_carry))
3131 return false;
3132
3133 /* Save the distance vector if we initialized one. */
3134 if (init_b)
3135 {
3136 /* Verify a basic constraint: classic distance vectors should
3137 always be lexicographically positive.
3138
3139 Data references are collected in the order of execution of
3140 the program, thus for the following loop
3141
3142 | for (i = 1; i < 100; i++)
3143 | for (j = 1; j < 100; j++)
3144 | {
3145 | t = T[j+1][i-1]; // A
3146 | T[j][i] = t + 2; // B
3147 | }
3148
3149 references are collected following the direction of the wind:
3150 A then B. The data dependence tests are performed also
3151 following this order, such that we're looking at the distance
3152 separating the elements accessed by A from the elements later
3153 accessed by B. But in this example, the distance returned by
3154 test_dep (A, B) is lexicographically negative (-1, 1), that
3155 means that the access A occurs later than B with respect to
3156 the outer loop, ie. we're actually looking upwind. In this
3157 case we solve test_dep (B, A) looking downwind to the
3158 lexicographically positive solution, that returns the
3159 distance vector (1, -1). */
3160 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3161 {
3162 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3163 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3164 loop_nest))
3165 return false;
3166 compute_subscript_distance (ddr);
3167 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3168 save_v, &init_b, &index_carry))
3169 return false;
3170 save_dist_v (ddr, save_v);
3171 DDR_REVERSED_P (ddr) = true;
3172
3173 /* In this case there is a dependence forward for all the
3174 outer loops:
3175
3176 | for (k = 1; k < 100; k++)
3177 | for (i = 1; i < 100; i++)
3178 | for (j = 1; j < 100; j++)
3179 | {
3180 | t = T[j+1][i-1]; // A
3181 | T[j][i] = t + 2; // B
3182 | }
3183
3184 the vectors are:
3185 (0, 1, -1)
3186 (1, 1, -1)
3187 (1, -1, 1)
3188 */
3189 if (DDR_NB_LOOPS (ddr) > 1)
3190 {
3191 add_outer_distances (ddr, save_v, index_carry);
3192 add_outer_distances (ddr, dist_v, index_carry);
3193 }
3194 }
3195 else
3196 {
3197 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3198 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3199
3200 if (DDR_NB_LOOPS (ddr) > 1)
3201 {
3202 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3203
3204 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3205 DDR_A (ddr), loop_nest))
3206 return false;
3207 compute_subscript_distance (ddr);
3208 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3209 opposite_v, &init_b,
3210 &index_carry))
3211 return false;
3212
3213 save_dist_v (ddr, save_v);
3214 add_outer_distances (ddr, dist_v, index_carry);
3215 add_outer_distances (ddr, opposite_v, index_carry);
3216 }
3217 else
3218 save_dist_v (ddr, save_v);
3219 }
3220 }
3221 else
3222 {
3223 /* There is a distance of 1 on all the outer loops: Example:
3224 there is a dependence of distance 1 on loop_1 for the array A.
3225
3226 | loop_1
3227 | A[5] = ...
3228 | endloop
3229 */
3230 add_outer_distances (ddr, dist_v,
3231 lambda_vector_first_nz (dist_v,
3232 DDR_NB_LOOPS (ddr), 0));
3233 }
3234
3235 if (dump_file && (dump_flags & TDF_DETAILS))
3236 {
3237 unsigned i;
3238
3239 fprintf (dump_file, "(build_classic_dist_vector\n");
3240 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3241 {
3242 fprintf (dump_file, " dist_vector = (");
3243 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3244 DDR_NB_LOOPS (ddr));
3245 fprintf (dump_file, " )\n");
3246 }
3247 fprintf (dump_file, ")\n");
3248 }
3249
3250 return true;
3251 }
3252
3253 /* Return the direction for a given distance.
3254 FIXME: Computing dir this way is suboptimal, since dir can catch
3255 cases that dist is unable to represent. */
3256
3257 static inline enum data_dependence_direction
3258 dir_from_dist (int dist)
3259 {
3260 if (dist > 0)
3261 return dir_positive;
3262 else if (dist < 0)
3263 return dir_negative;
3264 else
3265 return dir_equal;
3266 }
3267
3268 /* Compute the classic per loop direction vector. DDR is the data
3269 dependence relation to build a vector from. */
3270
3271 static void
3272 build_classic_dir_vector (struct data_dependence_relation *ddr)
3273 {
3274 unsigned i, j;
3275 lambda_vector dist_v;
3276
3277 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3278 {
3279 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3280
3281 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3282 dir_v[j] = dir_from_dist (dist_v[j]);
3283
3284 save_dir_v (ddr, dir_v);
3285 }
3286 }
3287
3288 /* Helper function. Returns true when there is a dependence between
3289 data references DRA and DRB. */
3290
3291 static bool
3292 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3293 struct data_reference *dra,
3294 struct data_reference *drb,
3295 struct loop *loop_nest)
3296 {
3297 unsigned int i;
3298 tree last_conflicts;
3299 struct subscript *subscript;
3300
3301 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3302 i++)
3303 {
3304 conflict_function *overlaps_a, *overlaps_b;
3305
3306 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3307 DR_ACCESS_FN (drb, i),
3308 &overlaps_a, &overlaps_b,
3309 &last_conflicts, loop_nest);
3310
3311 if (CF_NOT_KNOWN_P (overlaps_a)
3312 || CF_NOT_KNOWN_P (overlaps_b))
3313 {
3314 finalize_ddr_dependent (ddr, chrec_dont_know);
3315 dependence_stats.num_dependence_undetermined++;
3316 free_conflict_function (overlaps_a);
3317 free_conflict_function (overlaps_b);
3318 return false;
3319 }
3320
3321 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3322 || CF_NO_DEPENDENCE_P (overlaps_b))
3323 {
3324 finalize_ddr_dependent (ddr, chrec_known);
3325 dependence_stats.num_dependence_independent++;
3326 free_conflict_function (overlaps_a);
3327 free_conflict_function (overlaps_b);
3328 return false;
3329 }
3330
3331 else
3332 {
3333 if (SUB_CONFLICTS_IN_A (subscript))
3334 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3335 if (SUB_CONFLICTS_IN_B (subscript))
3336 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3337
3338 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3339 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3340 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3341 }
3342 }
3343
3344 return true;
3345 }
3346
3347 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3348
3349 static void
3350 subscript_dependence_tester (struct data_dependence_relation *ddr,
3351 struct loop *loop_nest)
3352 {
3353
3354 if (dump_file && (dump_flags & TDF_DETAILS))
3355 fprintf (dump_file, "(subscript_dependence_tester \n");
3356
3357 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3358 dependence_stats.num_dependence_dependent++;
3359
3360 compute_subscript_distance (ddr);
3361 if (build_classic_dist_vector (ddr, loop_nest))
3362 build_classic_dir_vector (ddr);
3363
3364 if (dump_file && (dump_flags & TDF_DETAILS))
3365 fprintf (dump_file, ")\n");
3366 }
3367
3368 /* Returns true when all the access functions of A are affine or
3369 constant with respect to LOOP_NEST. */
3370
3371 static bool
3372 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3373 const struct loop *loop_nest)
3374 {
3375 unsigned int i;
3376 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3377 tree t;
3378
3379 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3380 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3381 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3382 return false;
3383
3384 return true;
3385 }
3386
3387 /* Initializes an equation for an OMEGA problem using the information
3388 contained in the ACCESS_FUN. Returns true when the operation
3389 succeeded.
3390
3391 PB is the omega constraint system.
3392 EQ is the number of the equation to be initialized.
3393 OFFSET is used for shifting the variables names in the constraints:
3394 a constrain is composed of 2 * the number of variables surrounding
3395 dependence accesses. OFFSET is set either to 0 for the first n variables,
3396 then it is set to n.
3397 ACCESS_FUN is expected to be an affine chrec. */
3398
3399 static bool
3400 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3401 unsigned int offset, tree access_fun,
3402 struct data_dependence_relation *ddr)
3403 {
3404 switch (TREE_CODE (access_fun))
3405 {
3406 case POLYNOMIAL_CHREC:
3407 {
3408 tree left = CHREC_LEFT (access_fun);
3409 tree right = CHREC_RIGHT (access_fun);
3410 int var = CHREC_VARIABLE (access_fun);
3411 unsigned var_idx;
3412
3413 if (TREE_CODE (right) != INTEGER_CST)
3414 return false;
3415
3416 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3417 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3418
3419 /* Compute the innermost loop index. */
3420 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3421
3422 if (offset == 0)
3423 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3424 += int_cst_value (right);
3425
3426 switch (TREE_CODE (left))
3427 {
3428 case POLYNOMIAL_CHREC:
3429 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3430
3431 case INTEGER_CST:
3432 pb->eqs[eq].coef[0] += int_cst_value (left);
3433 return true;
3434
3435 default:
3436 return false;
3437 }
3438 }
3439
3440 case INTEGER_CST:
3441 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3442 return true;
3443
3444 default:
3445 return false;
3446 }
3447 }
3448
3449 /* As explained in the comments preceding init_omega_for_ddr, we have
3450 to set up a system for each loop level, setting outer loops
3451 variation to zero, and current loop variation to positive or zero.
3452 Save each lexico positive distance vector. */
3453
3454 static void
3455 omega_extract_distance_vectors (omega_pb pb,
3456 struct data_dependence_relation *ddr)
3457 {
3458 int eq, geq;
3459 unsigned i, j;
3460 struct loop *loopi, *loopj;
3461 enum omega_result res;
3462
3463 /* Set a new problem for each loop in the nest. The basis is the
3464 problem that we have initialized until now. On top of this we
3465 add new constraints. */
3466 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3467 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3468 {
3469 int dist = 0;
3470 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3471 DDR_NB_LOOPS (ddr));
3472
3473 omega_copy_problem (copy, pb);
3474
3475 /* For all the outer loops "loop_j", add "dj = 0". */
3476 for (j = 0;
3477 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3478 {
3479 eq = omega_add_zero_eq (copy, omega_black);
3480 copy->eqs[eq].coef[j + 1] = 1;
3481 }
3482
3483 /* For "loop_i", add "0 <= di". */
3484 geq = omega_add_zero_geq (copy, omega_black);
3485 copy->geqs[geq].coef[i + 1] = 1;
3486
3487 /* Reduce the constraint system, and test that the current
3488 problem is feasible. */
3489 res = omega_simplify_problem (copy);
3490 if (res == omega_false
3491 || res == omega_unknown
3492 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3493 goto next_problem;
3494
3495 for (eq = 0; eq < copy->num_subs; eq++)
3496 if (copy->subs[eq].key == (int) i + 1)
3497 {
3498 dist = copy->subs[eq].coef[0];
3499 goto found_dist;
3500 }
3501
3502 if (dist == 0)
3503 {
3504 /* Reinitialize problem... */
3505 omega_copy_problem (copy, pb);
3506 for (j = 0;
3507 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3508 {
3509 eq = omega_add_zero_eq (copy, omega_black);
3510 copy->eqs[eq].coef[j + 1] = 1;
3511 }
3512
3513 /* ..., but this time "di = 1". */
3514 eq = omega_add_zero_eq (copy, omega_black);
3515 copy->eqs[eq].coef[i + 1] = 1;
3516 copy->eqs[eq].coef[0] = -1;
3517
3518 res = omega_simplify_problem (copy);
3519 if (res == omega_false
3520 || res == omega_unknown
3521 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3522 goto next_problem;
3523
3524 for (eq = 0; eq < copy->num_subs; eq++)
3525 if (copy->subs[eq].key == (int) i + 1)
3526 {
3527 dist = copy->subs[eq].coef[0];
3528 goto found_dist;
3529 }
3530 }
3531
3532 found_dist:;
3533 /* Save the lexicographically positive distance vector. */
3534 if (dist >= 0)
3535 {
3536 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3537 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3538
3539 dist_v[i] = dist;
3540
3541 for (eq = 0; eq < copy->num_subs; eq++)
3542 if (copy->subs[eq].key > 0)
3543 {
3544 dist = copy->subs[eq].coef[0];
3545 dist_v[copy->subs[eq].key - 1] = dist;
3546 }
3547
3548 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3549 dir_v[j] = dir_from_dist (dist_v[j]);
3550
3551 save_dist_v (ddr, dist_v);
3552 save_dir_v (ddr, dir_v);
3553 }
3554
3555 next_problem:;
3556 omega_free_problem (copy);
3557 }
3558 }
3559
3560 /* This is called for each subscript of a tuple of data references:
3561 insert an equality for representing the conflicts. */
3562
3563 static bool
3564 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3565 struct data_dependence_relation *ddr,
3566 omega_pb pb, bool *maybe_dependent)
3567 {
3568 int eq;
3569 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3570 TREE_TYPE (access_fun_b));
3571 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3572 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3573 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3574
3575 /* When the fun_a - fun_b is not constant, the dependence is not
3576 captured by the classic distance vector representation. */
3577 if (TREE_CODE (difference) != INTEGER_CST)
3578 return false;
3579
3580 /* ZIV test. */
3581 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3582 {
3583 /* There is no dependence. */
3584 *maybe_dependent = false;
3585 return true;
3586 }
3587
3588 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3589
3590 eq = omega_add_zero_eq (pb, omega_black);
3591 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3592 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3593 /* There is probably a dependence, but the system of
3594 constraints cannot be built: answer "don't know". */
3595 return false;
3596
3597 /* GCD test. */
3598 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3599 && !int_divides_p (lambda_vector_gcd
3600 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3601 2 * DDR_NB_LOOPS (ddr)),
3602 pb->eqs[eq].coef[0]))
3603 {
3604 /* There is no dependence. */
3605 *maybe_dependent = false;
3606 return true;
3607 }
3608
3609 return true;
3610 }
3611
3612 /* Helper function, same as init_omega_for_ddr but specialized for
3613 data references A and B. */
3614
3615 static bool
3616 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3617 struct data_dependence_relation *ddr,
3618 omega_pb pb, bool *maybe_dependent)
3619 {
3620 unsigned i;
3621 int ineq;
3622 struct loop *loopi;
3623 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3624
3625 /* Insert an equality per subscript. */
3626 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3627 {
3628 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3629 ddr, pb, maybe_dependent))
3630 return false;
3631 else if (*maybe_dependent == false)
3632 {
3633 /* There is no dependence. */
3634 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3635 return true;
3636 }
3637 }
3638
3639 /* Insert inequalities: constraints corresponding to the iteration
3640 domain, i.e. the loops surrounding the references "loop_x" and
3641 the distance variables "dx". The layout of the OMEGA
3642 representation is as follows:
3643 - coef[0] is the constant
3644 - coef[1..nb_loops] are the protected variables that will not be
3645 removed by the solver: the "dx"
3646 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3647 */
3648 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3649 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3650 {
3651 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3652
3653 /* 0 <= loop_x */
3654 ineq = omega_add_zero_geq (pb, omega_black);
3655 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3656
3657 /* 0 <= loop_x + dx */
3658 ineq = omega_add_zero_geq (pb, omega_black);
3659 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3660 pb->geqs[ineq].coef[i + 1] = 1;
3661
3662 if (nbi != -1)
3663 {
3664 /* loop_x <= nb_iters */
3665 ineq = omega_add_zero_geq (pb, omega_black);
3666 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3667 pb->geqs[ineq].coef[0] = nbi;
3668
3669 /* loop_x + dx <= nb_iters */
3670 ineq = omega_add_zero_geq (pb, omega_black);
3671 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3672 pb->geqs[ineq].coef[i + 1] = -1;
3673 pb->geqs[ineq].coef[0] = nbi;
3674
3675 /* A step "dx" bigger than nb_iters is not feasible, so
3676 add "0 <= nb_iters + dx", */
3677 ineq = omega_add_zero_geq (pb, omega_black);
3678 pb->geqs[ineq].coef[i + 1] = 1;
3679 pb->geqs[ineq].coef[0] = nbi;
3680 /* and "dx <= nb_iters". */
3681 ineq = omega_add_zero_geq (pb, omega_black);
3682 pb->geqs[ineq].coef[i + 1] = -1;
3683 pb->geqs[ineq].coef[0] = nbi;
3684 }
3685 }
3686
3687 omega_extract_distance_vectors (pb, ddr);
3688
3689 return true;
3690 }
3691
3692 /* Sets up the Omega dependence problem for the data dependence
3693 relation DDR. Returns false when the constraint system cannot be
3694 built, ie. when the test answers "don't know". Returns true
3695 otherwise, and when independence has been proved (using one of the
3696 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3697 set MAYBE_DEPENDENT to true.
3698
3699 Example: for setting up the dependence system corresponding to the
3700 conflicting accesses
3701
3702 | loop_i
3703 | loop_j
3704 | A[i, i+1] = ...
3705 | ... A[2*j, 2*(i + j)]
3706 | endloop_j
3707 | endloop_i
3708
3709 the following constraints come from the iteration domain:
3710
3711 0 <= i <= Ni
3712 0 <= i + di <= Ni
3713 0 <= j <= Nj
3714 0 <= j + dj <= Nj
3715
3716 where di, dj are the distance variables. The constraints
3717 representing the conflicting elements are:
3718
3719 i = 2 * (j + dj)
3720 i + 1 = 2 * (i + di + j + dj)
3721
3722 For asking that the resulting distance vector (di, dj) be
3723 lexicographically positive, we insert the constraint "di >= 0". If
3724 "di = 0" in the solution, we fix that component to zero, and we
3725 look at the inner loops: we set a new problem where all the outer
3726 loop distances are zero, and fix this inner component to be
3727 positive. When one of the components is positive, we save that
3728 distance, and set a new problem where the distance on this loop is
3729 zero, searching for other distances in the inner loops. Here is
3730 the classic example that illustrates that we have to set for each
3731 inner loop a new problem:
3732
3733 | loop_1
3734 | loop_2
3735 | A[10]
3736 | endloop_2
3737 | endloop_1
3738
3739 we have to save two distances (1, 0) and (0, 1).
3740
3741 Given two array references, refA and refB, we have to set the
3742 dependence problem twice, refA vs. refB and refB vs. refA, and we
3743 cannot do a single test, as refB might occur before refA in the
3744 inner loops, and the contrary when considering outer loops: ex.
3745
3746 | loop_0
3747 | loop_1
3748 | loop_2
3749 | T[{1,+,1}_2][{1,+,1}_1] // refA
3750 | T[{2,+,1}_2][{0,+,1}_1] // refB
3751 | endloop_2
3752 | endloop_1
3753 | endloop_0
3754
3755 refB touches the elements in T before refA, and thus for the same
3756 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3757 but for successive loop_0 iterations, we have (1, -1, 1)
3758
3759 The Omega solver expects the distance variables ("di" in the
3760 previous example) to come first in the constraint system (as
3761 variables to be protected, or "safe" variables), the constraint
3762 system is built using the following layout:
3763
3764 "cst | distance vars | index vars".
3765 */
3766
3767 static bool
3768 init_omega_for_ddr (struct data_dependence_relation *ddr,
3769 bool *maybe_dependent)
3770 {
3771 omega_pb pb;
3772 bool res = false;
3773
3774 *maybe_dependent = true;
3775
3776 if (same_access_functions (ddr))
3777 {
3778 unsigned j;
3779 lambda_vector dir_v;
3780
3781 /* Save the 0 vector. */
3782 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3783 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3784 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3785 dir_v[j] = dir_equal;
3786 save_dir_v (ddr, dir_v);
3787
3788 /* Save the dependences carried by outer loops. */
3789 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3790 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3791 maybe_dependent);
3792 omega_free_problem (pb);
3793 return res;
3794 }
3795
3796 /* Omega expects the protected variables (those that have to be kept
3797 after elimination) to appear first in the constraint system.
3798 These variables are the distance variables. In the following
3799 initialization we declare NB_LOOPS safe variables, and the total
3800 number of variables for the constraint system is 2*NB_LOOPS. */
3801 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3802 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3803 maybe_dependent);
3804 omega_free_problem (pb);
3805
3806 /* Stop computation if not decidable, or no dependence. */
3807 if (res == false || *maybe_dependent == false)
3808 return res;
3809
3810 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3811 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3812 maybe_dependent);
3813 omega_free_problem (pb);
3814
3815 return res;
3816 }
3817
3818 /* Return true when DDR contains the same information as that stored
3819 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3820
3821 static bool
3822 ddr_consistent_p (FILE *file,
3823 struct data_dependence_relation *ddr,
3824 VEC (lambda_vector, heap) *dist_vects,
3825 VEC (lambda_vector, heap) *dir_vects)
3826 {
3827 unsigned int i, j;
3828
3829 /* If dump_file is set, output there. */
3830 if (dump_file && (dump_flags & TDF_DETAILS))
3831 file = dump_file;
3832
3833 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3834 {
3835 lambda_vector b_dist_v;
3836 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3837 VEC_length (lambda_vector, dist_vects),
3838 DDR_NUM_DIST_VECTS (ddr));
3839
3840 fprintf (file, "Banerjee dist vectors:\n");
3841 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3842 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3843
3844 fprintf (file, "Omega dist vectors:\n");
3845 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3846 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3847
3848 fprintf (file, "data dependence relation:\n");
3849 dump_data_dependence_relation (file, ddr);
3850
3851 fprintf (file, ")\n");
3852 return false;
3853 }
3854
3855 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3856 {
3857 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3858 VEC_length (lambda_vector, dir_vects),
3859 DDR_NUM_DIR_VECTS (ddr));
3860 return false;
3861 }
3862
3863 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3864 {
3865 lambda_vector a_dist_v;
3866 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3867
3868 /* Distance vectors are not ordered in the same way in the DDR
3869 and in the DIST_VECTS: search for a matching vector. */
3870 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3871 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3872 break;
3873
3874 if (j == VEC_length (lambda_vector, dist_vects))
3875 {
3876 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3877 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3878 fprintf (file, "not found in Omega dist vectors:\n");
3879 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3880 fprintf (file, "data dependence relation:\n");
3881 dump_data_dependence_relation (file, ddr);
3882 fprintf (file, ")\n");
3883 }
3884 }
3885
3886 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3887 {
3888 lambda_vector a_dir_v;
3889 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3890
3891 /* Direction vectors are not ordered in the same way in the DDR
3892 and in the DIR_VECTS: search for a matching vector. */
3893 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3894 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3895 break;
3896
3897 if (j == VEC_length (lambda_vector, dist_vects))
3898 {
3899 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3900 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3901 fprintf (file, "not found in Omega dir vectors:\n");
3902 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3903 fprintf (file, "data dependence relation:\n");
3904 dump_data_dependence_relation (file, ddr);
3905 fprintf (file, ")\n");
3906 }
3907 }
3908
3909 return true;
3910 }
3911
3912 /* This computes the affine dependence relation between A and B with
3913 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3914 independence between two accesses, while CHREC_DONT_KNOW is used
3915 for representing the unknown relation.
3916
3917 Note that it is possible to stop the computation of the dependence
3918 relation the first time we detect a CHREC_KNOWN element for a given
3919 subscript. */
3920
3921 static void
3922 compute_affine_dependence (struct data_dependence_relation *ddr,
3923 struct loop *loop_nest)
3924 {
3925 struct data_reference *dra = DDR_A (ddr);
3926 struct data_reference *drb = DDR_B (ddr);
3927
3928 if (dump_file && (dump_flags & TDF_DETAILS))
3929 {
3930 fprintf (dump_file, "(compute_affine_dependence\n");
3931 fprintf (dump_file, " (stmt_a = \n");
3932 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3933 fprintf (dump_file, ")\n (stmt_b = \n");
3934 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3935 fprintf (dump_file, ")\n");
3936 }
3937
3938 /* Analyze only when the dependence relation is not yet known. */
3939 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3940 && !DDR_SELF_REFERENCE (ddr))
3941 {
3942 dependence_stats.num_dependence_tests++;
3943
3944 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3945 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3946 {
3947 if (flag_check_data_deps)
3948 {
3949 /* Compute the dependences using the first algorithm. */
3950 subscript_dependence_tester (ddr, loop_nest);
3951
3952 if (dump_file && (dump_flags & TDF_DETAILS))
3953 {
3954 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3955 dump_data_dependence_relation (dump_file, ddr);
3956 }
3957
3958 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3959 {
3960 bool maybe_dependent;
3961 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3962
3963 /* Save the result of the first DD analyzer. */
3964 dist_vects = DDR_DIST_VECTS (ddr);
3965 dir_vects = DDR_DIR_VECTS (ddr);
3966
3967 /* Reset the information. */
3968 DDR_DIST_VECTS (ddr) = NULL;
3969 DDR_DIR_VECTS (ddr) = NULL;
3970
3971 /* Compute the same information using Omega. */
3972 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3973 goto csys_dont_know;
3974
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3976 {
3977 fprintf (dump_file, "Omega Analyzer\n");
3978 dump_data_dependence_relation (dump_file, ddr);
3979 }
3980
3981 /* Check that we get the same information. */
3982 if (maybe_dependent)
3983 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3984 dir_vects));
3985 }
3986 }
3987 else
3988 subscript_dependence_tester (ddr, loop_nest);
3989 }
3990
3991 /* As a last case, if the dependence cannot be determined, or if
3992 the dependence is considered too difficult to determine, answer
3993 "don't know". */
3994 else
3995 {
3996 csys_dont_know:;
3997 dependence_stats.num_dependence_undetermined++;
3998
3999 if (dump_file && (dump_flags & TDF_DETAILS))
4000 {
4001 fprintf (dump_file, "Data ref a:\n");
4002 dump_data_reference (dump_file, dra);
4003 fprintf (dump_file, "Data ref b:\n");
4004 dump_data_reference (dump_file, drb);
4005 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4006 }
4007 finalize_ddr_dependent (ddr, chrec_dont_know);
4008 }
4009 }
4010
4011 if (dump_file && (dump_flags & TDF_DETAILS))
4012 fprintf (dump_file, ")\n");
4013 }
4014
4015 /* This computes the dependence relation for the same data
4016 reference into DDR. */
4017
4018 static void
4019 compute_self_dependence (struct data_dependence_relation *ddr)
4020 {
4021 unsigned int i;
4022 struct subscript *subscript;
4023
4024 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4025 return;
4026
4027 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4028 i++)
4029 {
4030 if (SUB_CONFLICTS_IN_A (subscript))
4031 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4032 if (SUB_CONFLICTS_IN_B (subscript))
4033 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4034
4035 /* The accessed index overlaps for each iteration. */
4036 SUB_CONFLICTS_IN_A (subscript)
4037 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4038 SUB_CONFLICTS_IN_B (subscript)
4039 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4040 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4041 }
4042
4043 /* The distance vector is the zero vector. */
4044 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4045 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4046 }
4047
4048 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4049 the data references in DATAREFS, in the LOOP_NEST. When
4050 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4051 relations. */
4052
4053 void
4054 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4055 VEC (ddr_p, heap) **dependence_relations,
4056 VEC (loop_p, heap) *loop_nest,
4057 bool compute_self_and_rr)
4058 {
4059 struct data_dependence_relation *ddr;
4060 struct data_reference *a, *b;
4061 unsigned int i, j;
4062
4063 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4064 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4065 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
4066 {
4067 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4068 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4069 if (loop_nest)
4070 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4071 }
4072
4073 if (compute_self_and_rr)
4074 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4075 {
4076 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4077 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4078 compute_self_dependence (ddr);
4079 }
4080 }
4081
4082 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4083 true if STMT clobbers memory, false otherwise. */
4084
4085 bool
4086 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4087 {
4088 bool clobbers_memory = false;
4089 data_ref_loc *ref;
4090 tree *op0, *op1;
4091 enum gimple_code stmt_code = gimple_code (stmt);
4092
4093 *references = NULL;
4094
4095 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4096 Calls have side-effects, except those to const or pure
4097 functions. */
4098 if ((stmt_code == GIMPLE_CALL
4099 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4100 || (stmt_code == GIMPLE_ASM
4101 && gimple_asm_volatile_p (stmt)))
4102 clobbers_memory = true;
4103
4104 if (!gimple_vuse (stmt))
4105 return clobbers_memory;
4106
4107 if (stmt_code == GIMPLE_ASSIGN)
4108 {
4109 tree base;
4110 op0 = gimple_assign_lhs_ptr (stmt);
4111 op1 = gimple_assign_rhs1_ptr (stmt);
4112
4113 if (DECL_P (*op1)
4114 || (REFERENCE_CLASS_P (*op1)
4115 && (base = get_base_address (*op1))
4116 && TREE_CODE (base) != SSA_NAME))
4117 {
4118 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4119 ref->pos = op1;
4120 ref->is_read = true;
4121 }
4122
4123 if (DECL_P (*op0)
4124 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4125 {
4126 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4127 ref->pos = op0;
4128 ref->is_read = false;
4129 }
4130 }
4131 else if (stmt_code == GIMPLE_CALL)
4132 {
4133 unsigned i, n = gimple_call_num_args (stmt);
4134
4135 for (i = 0; i < n; i++)
4136 {
4137 op0 = gimple_call_arg_ptr (stmt, i);
4138
4139 if (DECL_P (*op0)
4140 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4141 {
4142 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4143 ref->pos = op0;
4144 ref->is_read = true;
4145 }
4146 }
4147 }
4148
4149 return clobbers_memory;
4150 }
4151
4152 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4153 reference, returns false, otherwise returns true. NEST is the outermost
4154 loop of the loop nest in which the references should be analyzed. */
4155
4156 bool
4157 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4158 VEC (data_reference_p, heap) **datarefs)
4159 {
4160 unsigned i;
4161 VEC (data_ref_loc, heap) *references;
4162 data_ref_loc *ref;
4163 bool ret = true;
4164 data_reference_p dr;
4165
4166 if (get_references_in_stmt (stmt, &references))
4167 {
4168 VEC_free (data_ref_loc, heap, references);
4169 return false;
4170 }
4171
4172 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4173 {
4174 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4175 gcc_assert (dr != NULL);
4176
4177 /* FIXME -- data dependence analysis does not work correctly for objects
4178 with invariant addresses in loop nests. Let us fail here until the
4179 problem is fixed. */
4180 if (dr_address_invariant_p (dr) && nest)
4181 {
4182 free_data_ref (dr);
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4184 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4185 ret = false;
4186 break;
4187 }
4188
4189 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4190 }
4191 VEC_free (data_ref_loc, heap, references);
4192 return ret;
4193 }
4194
4195 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4196 reference, returns false, otherwise returns true. NEST is the outermost
4197 loop of the loop nest in which the references should be analyzed. */
4198
4199 bool
4200 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4201 VEC (data_reference_p, heap) **datarefs)
4202 {
4203 unsigned i;
4204 VEC (data_ref_loc, heap) *references;
4205 data_ref_loc *ref;
4206 bool ret = true;
4207 data_reference_p dr;
4208
4209 if (get_references_in_stmt (stmt, &references))
4210 {
4211 VEC_free (data_ref_loc, heap, references);
4212 return false;
4213 }
4214
4215 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4216 {
4217 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4218 gcc_assert (dr != NULL);
4219 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4220 }
4221
4222 VEC_free (data_ref_loc, heap, references);
4223 return ret;
4224 }
4225
4226 /* Search the data references in LOOP, and record the information into
4227 DATAREFS. Returns chrec_dont_know when failing to analyze a
4228 difficult case, returns NULL_TREE otherwise. */
4229
4230 static tree
4231 find_data_references_in_bb (struct loop *loop, basic_block bb,
4232 VEC (data_reference_p, heap) **datarefs)
4233 {
4234 gimple_stmt_iterator bsi;
4235
4236 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4237 {
4238 gimple stmt = gsi_stmt (bsi);
4239
4240 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4241 {
4242 struct data_reference *res;
4243 res = XCNEW (struct data_reference);
4244 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4245
4246 return chrec_dont_know;
4247 }
4248 }
4249
4250 return NULL_TREE;
4251 }
4252
4253 /* Search the data references in LOOP, and record the information into
4254 DATAREFS. Returns chrec_dont_know when failing to analyze a
4255 difficult case, returns NULL_TREE otherwise.
4256
4257 TODO: This function should be made smarter so that it can handle address
4258 arithmetic as if they were array accesses, etc. */
4259
4260 tree
4261 find_data_references_in_loop (struct loop *loop,
4262 VEC (data_reference_p, heap) **datarefs)
4263 {
4264 basic_block bb, *bbs;
4265 unsigned int i;
4266
4267 bbs = get_loop_body_in_dom_order (loop);
4268
4269 for (i = 0; i < loop->num_nodes; i++)
4270 {
4271 bb = bbs[i];
4272
4273 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4274 {
4275 free (bbs);
4276 return chrec_dont_know;
4277 }
4278 }
4279 free (bbs);
4280
4281 return NULL_TREE;
4282 }
4283
4284 /* Recursive helper function. */
4285
4286 static bool
4287 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4288 {
4289 /* Inner loops of the nest should not contain siblings. Example:
4290 when there are two consecutive loops,
4291
4292 | loop_0
4293 | loop_1
4294 | A[{0, +, 1}_1]
4295 | endloop_1
4296 | loop_2
4297 | A[{0, +, 1}_2]
4298 | endloop_2
4299 | endloop_0
4300
4301 the dependence relation cannot be captured by the distance
4302 abstraction. */
4303 if (loop->next)
4304 return false;
4305
4306 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4307 if (loop->inner)
4308 return find_loop_nest_1 (loop->inner, loop_nest);
4309 return true;
4310 }
4311
4312 /* Return false when the LOOP is not well nested. Otherwise return
4313 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4314 contain the loops from the outermost to the innermost, as they will
4315 appear in the classic distance vector. */
4316
4317 bool
4318 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4319 {
4320 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4321 if (loop->inner)
4322 return find_loop_nest_1 (loop->inner, loop_nest);
4323 return true;
4324 }
4325
4326 /* Returns true when the data dependences have been computed, false otherwise.
4327 Given a loop nest LOOP, the following vectors are returned:
4328 DATAREFS is initialized to all the array elements contained in this loop,
4329 DEPENDENCE_RELATIONS contains the relations between the data references.
4330 Compute read-read and self relations if
4331 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4332
4333 bool
4334 compute_data_dependences_for_loop (struct loop *loop,
4335 bool compute_self_and_read_read_dependences,
4336 VEC (data_reference_p, heap) **datarefs,
4337 VEC (ddr_p, heap) **dependence_relations)
4338 {
4339 bool res = true;
4340 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4341
4342 memset (&dependence_stats, 0, sizeof (dependence_stats));
4343
4344 /* If the loop nest is not well formed, or one of the data references
4345 is not computable, give up without spending time to compute other
4346 dependences. */
4347 if (!loop
4348 || !find_loop_nest (loop, &vloops)
4349 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4350 {
4351 struct data_dependence_relation *ddr;
4352
4353 /* Insert a single relation into dependence_relations:
4354 chrec_dont_know. */
4355 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4356 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4357 res = false;
4358 }
4359 else
4360 compute_all_dependences (*datarefs, dependence_relations, vloops,
4361 compute_self_and_read_read_dependences);
4362
4363 if (dump_file && (dump_flags & TDF_STATS))
4364 {
4365 fprintf (dump_file, "Dependence tester statistics:\n");
4366
4367 fprintf (dump_file, "Number of dependence tests: %d\n",
4368 dependence_stats.num_dependence_tests);
4369 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4370 dependence_stats.num_dependence_dependent);
4371 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4372 dependence_stats.num_dependence_independent);
4373 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4374 dependence_stats.num_dependence_undetermined);
4375
4376 fprintf (dump_file, "Number of subscript tests: %d\n",
4377 dependence_stats.num_subscript_tests);
4378 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4379 dependence_stats.num_subscript_undetermined);
4380 fprintf (dump_file, "Number of same subscript function: %d\n",
4381 dependence_stats.num_same_subscript_function);
4382
4383 fprintf (dump_file, "Number of ziv tests: %d\n",
4384 dependence_stats.num_ziv);
4385 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4386 dependence_stats.num_ziv_dependent);
4387 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4388 dependence_stats.num_ziv_independent);
4389 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4390 dependence_stats.num_ziv_unimplemented);
4391
4392 fprintf (dump_file, "Number of siv tests: %d\n",
4393 dependence_stats.num_siv);
4394 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4395 dependence_stats.num_siv_dependent);
4396 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4397 dependence_stats.num_siv_independent);
4398 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4399 dependence_stats.num_siv_unimplemented);
4400
4401 fprintf (dump_file, "Number of miv tests: %d\n",
4402 dependence_stats.num_miv);
4403 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4404 dependence_stats.num_miv_dependent);
4405 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4406 dependence_stats.num_miv_independent);
4407 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4408 dependence_stats.num_miv_unimplemented);
4409 }
4410
4411 return res;
4412 }
4413
4414 /* Returns true when the data dependences for the basic block BB have been
4415 computed, false otherwise.
4416 DATAREFS is initialized to all the array elements contained in this basic
4417 block, DEPENDENCE_RELATIONS contains the relations between the data
4418 references. Compute read-read and self relations if
4419 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4420 bool
4421 compute_data_dependences_for_bb (basic_block bb,
4422 bool compute_self_and_read_read_dependences,
4423 VEC (data_reference_p, heap) **datarefs,
4424 VEC (ddr_p, heap) **dependence_relations)
4425 {
4426 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4427 return false;
4428
4429 compute_all_dependences (*datarefs, dependence_relations, NULL,
4430 compute_self_and_read_read_dependences);
4431 return true;
4432 }
4433
4434 /* Entry point (for testing only). Analyze all the data references
4435 and the dependence relations in LOOP.
4436
4437 The data references are computed first.
4438
4439 A relation on these nodes is represented by a complete graph. Some
4440 of the relations could be of no interest, thus the relations can be
4441 computed on demand.
4442
4443 In the following function we compute all the relations. This is
4444 just a first implementation that is here for:
4445 - for showing how to ask for the dependence relations,
4446 - for the debugging the whole dependence graph,
4447 - for the dejagnu testcases and maintenance.
4448
4449 It is possible to ask only for a part of the graph, avoiding to
4450 compute the whole dependence graph. The computed dependences are
4451 stored in a knowledge base (KB) such that later queries don't
4452 recompute the same information. The implementation of this KB is
4453 transparent to the optimizer, and thus the KB can be changed with a
4454 more efficient implementation, or the KB could be disabled. */
4455 static void
4456 analyze_all_data_dependences (struct loop *loop)
4457 {
4458 unsigned int i;
4459 int nb_data_refs = 10;
4460 VEC (data_reference_p, heap) *datarefs =
4461 VEC_alloc (data_reference_p, heap, nb_data_refs);
4462 VEC (ddr_p, heap) *dependence_relations =
4463 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4464
4465 /* Compute DDs on the whole function. */
4466 compute_data_dependences_for_loop (loop, false, &datarefs,
4467 &dependence_relations);
4468
4469 if (dump_file)
4470 {
4471 dump_data_dependence_relations (dump_file, dependence_relations);
4472 fprintf (dump_file, "\n\n");
4473
4474 if (dump_flags & TDF_DETAILS)
4475 dump_dist_dir_vectors (dump_file, dependence_relations);
4476
4477 if (dump_flags & TDF_STATS)
4478 {
4479 unsigned nb_top_relations = 0;
4480 unsigned nb_bot_relations = 0;
4481 unsigned nb_chrec_relations = 0;
4482 struct data_dependence_relation *ddr;
4483
4484 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4485 {
4486 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4487 nb_top_relations++;
4488
4489 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4490 nb_bot_relations++;
4491
4492 else
4493 nb_chrec_relations++;
4494 }
4495
4496 gather_stats_on_scev_database ();
4497 }
4498 }
4499
4500 free_dependence_relations (dependence_relations);
4501 free_data_refs (datarefs);
4502 }
4503
4504 /* Computes all the data dependences and check that the results of
4505 several analyzers are the same. */
4506
4507 void
4508 tree_check_data_deps (void)
4509 {
4510 loop_iterator li;
4511 struct loop *loop_nest;
4512
4513 FOR_EACH_LOOP (li, loop_nest, 0)
4514 analyze_all_data_dependences (loop_nest);
4515 }
4516
4517 /* Free the memory used by a data dependence relation DDR. */
4518
4519 void
4520 free_dependence_relation (struct data_dependence_relation *ddr)
4521 {
4522 if (ddr == NULL)
4523 return;
4524
4525 if (DDR_SUBSCRIPTS (ddr))
4526 free_subscripts (DDR_SUBSCRIPTS (ddr));
4527 if (DDR_DIST_VECTS (ddr))
4528 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4529 if (DDR_DIR_VECTS (ddr))
4530 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4531
4532 free (ddr);
4533 }
4534
4535 /* Free the memory used by the data dependence relations from
4536 DEPENDENCE_RELATIONS. */
4537
4538 void
4539 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4540 {
4541 unsigned int i;
4542 struct data_dependence_relation *ddr;
4543 VEC (loop_p, heap) *loop_nest = NULL;
4544
4545 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4546 {
4547 if (ddr == NULL)
4548 continue;
4549 if (loop_nest == NULL)
4550 loop_nest = DDR_LOOP_NEST (ddr);
4551 else
4552 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4553 || DDR_LOOP_NEST (ddr) == loop_nest);
4554 free_dependence_relation (ddr);
4555 }
4556
4557 if (loop_nest)
4558 VEC_free (loop_p, heap, loop_nest);
4559 VEC_free (ddr_p, heap, dependence_relations);
4560 }
4561
4562 /* Free the memory used by the data references from DATAREFS. */
4563
4564 void
4565 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4566 {
4567 unsigned int i;
4568 struct data_reference *dr;
4569
4570 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4571 free_data_ref (dr);
4572 VEC_free (data_reference_p, heap, datarefs);
4573 }
4574
4575 \f
4576
4577 /* Dump vertex I in RDG to FILE. */
4578
4579 void
4580 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4581 {
4582 struct vertex *v = &(rdg->vertices[i]);
4583 struct graph_edge *e;
4584
4585 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4586 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4587 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4588
4589 if (v->pred)
4590 for (e = v->pred; e; e = e->pred_next)
4591 fprintf (file, " %d", e->src);
4592
4593 fprintf (file, ") (out:");
4594
4595 if (v->succ)
4596 for (e = v->succ; e; e = e->succ_next)
4597 fprintf (file, " %d", e->dest);
4598
4599 fprintf (file, ") \n");
4600 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4601 fprintf (file, ")\n");
4602 }
4603
4604 /* Call dump_rdg_vertex on stderr. */
4605
4606 DEBUG_FUNCTION void
4607 debug_rdg_vertex (struct graph *rdg, int i)
4608 {
4609 dump_rdg_vertex (stderr, rdg, i);
4610 }
4611
4612 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4613 dumped vertices to that bitmap. */
4614
4615 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4616 {
4617 int i;
4618
4619 fprintf (file, "(%d\n", c);
4620
4621 for (i = 0; i < rdg->n_vertices; i++)
4622 if (rdg->vertices[i].component == c)
4623 {
4624 if (dumped)
4625 bitmap_set_bit (dumped, i);
4626
4627 dump_rdg_vertex (file, rdg, i);
4628 }
4629
4630 fprintf (file, ")\n");
4631 }
4632
4633 /* Call dump_rdg_vertex on stderr. */
4634
4635 DEBUG_FUNCTION void
4636 debug_rdg_component (struct graph *rdg, int c)
4637 {
4638 dump_rdg_component (stderr, rdg, c, NULL);
4639 }
4640
4641 /* Dump the reduced dependence graph RDG to FILE. */
4642
4643 void
4644 dump_rdg (FILE *file, struct graph *rdg)
4645 {
4646 int i;
4647 bitmap dumped = BITMAP_ALLOC (NULL);
4648
4649 fprintf (file, "(rdg\n");
4650
4651 for (i = 0; i < rdg->n_vertices; i++)
4652 if (!bitmap_bit_p (dumped, i))
4653 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4654
4655 fprintf (file, ")\n");
4656 BITMAP_FREE (dumped);
4657 }
4658
4659 /* Call dump_rdg on stderr. */
4660
4661 DEBUG_FUNCTION void
4662 debug_rdg (struct graph *rdg)
4663 {
4664 dump_rdg (stderr, rdg);
4665 }
4666
4667 /* This structure is used for recording the mapping statement index in
4668 the RDG. */
4669
4670 struct GTY(()) rdg_vertex_info
4671 {
4672 gimple stmt;
4673 int index;
4674 };
4675
4676 /* Returns the index of STMT in RDG. */
4677
4678 int
4679 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4680 {
4681 struct rdg_vertex_info rvi, *slot;
4682
4683 rvi.stmt = stmt;
4684 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4685
4686 if (!slot)
4687 return -1;
4688
4689 return slot->index;
4690 }
4691
4692 /* Creates an edge in RDG for each distance vector from DDR. The
4693 order that we keep track of in the RDG is the order in which
4694 statements have to be executed. */
4695
4696 static void
4697 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4698 {
4699 struct graph_edge *e;
4700 int va, vb;
4701 data_reference_p dra = DDR_A (ddr);
4702 data_reference_p drb = DDR_B (ddr);
4703 unsigned level = ddr_dependence_level (ddr);
4704
4705 /* For non scalar dependences, when the dependence is REVERSED,
4706 statement B has to be executed before statement A. */
4707 if (level > 0
4708 && !DDR_REVERSED_P (ddr))
4709 {
4710 data_reference_p tmp = dra;
4711 dra = drb;
4712 drb = tmp;
4713 }
4714
4715 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4716 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4717
4718 if (va < 0 || vb < 0)
4719 return;
4720
4721 e = add_edge (rdg, va, vb);
4722 e->data = XNEW (struct rdg_edge);
4723
4724 RDGE_LEVEL (e) = level;
4725 RDGE_RELATION (e) = ddr;
4726
4727 /* Determines the type of the data dependence. */
4728 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4729 RDGE_TYPE (e) = input_dd;
4730 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4731 RDGE_TYPE (e) = output_dd;
4732 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4733 RDGE_TYPE (e) = flow_dd;
4734 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4735 RDGE_TYPE (e) = anti_dd;
4736 }
4737
4738 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4739 the index of DEF in RDG. */
4740
4741 static void
4742 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4743 {
4744 use_operand_p imm_use_p;
4745 imm_use_iterator iterator;
4746
4747 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4748 {
4749 struct graph_edge *e;
4750 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4751
4752 if (use < 0)
4753 continue;
4754
4755 e = add_edge (rdg, idef, use);
4756 e->data = XNEW (struct rdg_edge);
4757 RDGE_TYPE (e) = flow_dd;
4758 RDGE_RELATION (e) = NULL;
4759 }
4760 }
4761
4762 /* Creates the edges of the reduced dependence graph RDG. */
4763
4764 static void
4765 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4766 {
4767 int i;
4768 struct data_dependence_relation *ddr;
4769 def_operand_p def_p;
4770 ssa_op_iter iter;
4771
4772 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4773 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4774 create_rdg_edge_for_ddr (rdg, ddr);
4775
4776 for (i = 0; i < rdg->n_vertices; i++)
4777 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4778 iter, SSA_OP_DEF)
4779 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4780 }
4781
4782 /* Build the vertices of the reduced dependence graph RDG. */
4783
4784 void
4785 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4786 {
4787 int i, j;
4788 gimple stmt;
4789
4790 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4791 {
4792 VEC (data_ref_loc, heap) *references;
4793 data_ref_loc *ref;
4794 struct vertex *v = &(rdg->vertices[i]);
4795 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4796 struct rdg_vertex_info **slot;
4797
4798 rvi->stmt = stmt;
4799 rvi->index = i;
4800 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4801
4802 if (!*slot)
4803 *slot = rvi;
4804 else
4805 free (rvi);
4806
4807 v->data = XNEW (struct rdg_vertex);
4808 RDG_STMT (rdg, i) = stmt;
4809
4810 RDG_MEM_WRITE_STMT (rdg, i) = false;
4811 RDG_MEM_READS_STMT (rdg, i) = false;
4812 if (gimple_code (stmt) == GIMPLE_PHI)
4813 continue;
4814
4815 get_references_in_stmt (stmt, &references);
4816 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4817 if (!ref->is_read)
4818 RDG_MEM_WRITE_STMT (rdg, i) = true;
4819 else
4820 RDG_MEM_READS_STMT (rdg, i) = true;
4821
4822 VEC_free (data_ref_loc, heap, references);
4823 }
4824 }
4825
4826 /* Initialize STMTS with all the statements of LOOP. When
4827 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4828 which we discover statements is important as
4829 generate_loops_for_partition is using the same traversal for
4830 identifying statements. */
4831
4832 static void
4833 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4834 {
4835 unsigned int i;
4836 basic_block *bbs = get_loop_body_in_dom_order (loop);
4837
4838 for (i = 0; i < loop->num_nodes; i++)
4839 {
4840 basic_block bb = bbs[i];
4841 gimple_stmt_iterator bsi;
4842 gimple stmt;
4843
4844 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4845 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4846
4847 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4848 {
4849 stmt = gsi_stmt (bsi);
4850 if (gimple_code (stmt) != GIMPLE_LABEL)
4851 VEC_safe_push (gimple, heap, *stmts, stmt);
4852 }
4853 }
4854
4855 free (bbs);
4856 }
4857
4858 /* Returns true when all the dependences are computable. */
4859
4860 static bool
4861 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4862 {
4863 ddr_p ddr;
4864 unsigned int i;
4865
4866 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4867 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4868 return false;
4869
4870 return true;
4871 }
4872
4873 /* Computes a hash function for element ELT. */
4874
4875 static hashval_t
4876 hash_stmt_vertex_info (const void *elt)
4877 {
4878 const struct rdg_vertex_info *const rvi =
4879 (const struct rdg_vertex_info *) elt;
4880 gimple stmt = rvi->stmt;
4881
4882 return htab_hash_pointer (stmt);
4883 }
4884
4885 /* Compares database elements E1 and E2. */
4886
4887 static int
4888 eq_stmt_vertex_info (const void *e1, const void *e2)
4889 {
4890 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4891 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4892
4893 return elt1->stmt == elt2->stmt;
4894 }
4895
4896 /* Free the element E. */
4897
4898 static void
4899 hash_stmt_vertex_del (void *e)
4900 {
4901 free (e);
4902 }
4903
4904 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4905 statement of the loop nest, and one edge per data dependence or
4906 scalar dependence. */
4907
4908 struct graph *
4909 build_empty_rdg (int n_stmts)
4910 {
4911 int nb_data_refs = 10;
4912 struct graph *rdg = new_graph (n_stmts);
4913
4914 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4915 eq_stmt_vertex_info, hash_stmt_vertex_del);
4916 return rdg;
4917 }
4918
4919 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4920 statement of the loop nest, and one edge per data dependence or
4921 scalar dependence. */
4922
4923 struct graph *
4924 build_rdg (struct loop *loop)
4925 {
4926 int nb_data_refs = 10;
4927 struct graph *rdg = NULL;
4928 VEC (ddr_p, heap) *dependence_relations;
4929 VEC (data_reference_p, heap) *datarefs;
4930 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4931
4932 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4933 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4934 compute_data_dependences_for_loop (loop,
4935 false,
4936 &datarefs,
4937 &dependence_relations);
4938
4939 if (!known_dependences_p (dependence_relations))
4940 {
4941 free_dependence_relations (dependence_relations);
4942 free_data_refs (datarefs);
4943 VEC_free (gimple, heap, stmts);
4944
4945 return rdg;
4946 }
4947
4948 stmts_from_loop (loop, &stmts);
4949 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4950
4951 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4952 eq_stmt_vertex_info, hash_stmt_vertex_del);
4953 create_rdg_vertices (rdg, stmts);
4954 create_rdg_edges (rdg, dependence_relations);
4955
4956 VEC_free (gimple, heap, stmts);
4957 return rdg;
4958 }
4959
4960 /* Free the reduced dependence graph RDG. */
4961
4962 void
4963 free_rdg (struct graph *rdg)
4964 {
4965 int i;
4966
4967 for (i = 0; i < rdg->n_vertices; i++)
4968 free (rdg->vertices[i].data);
4969
4970 htab_delete (rdg->indices);
4971 free_graph (rdg);
4972 }
4973
4974 /* Initialize STMTS with all the statements of LOOP that contain a
4975 store to memory. */
4976
4977 void
4978 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4979 {
4980 unsigned int i;
4981 basic_block *bbs = get_loop_body_in_dom_order (loop);
4982
4983 for (i = 0; i < loop->num_nodes; i++)
4984 {
4985 basic_block bb = bbs[i];
4986 gimple_stmt_iterator bsi;
4987
4988 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4989 if (gimple_vdef (gsi_stmt (bsi)))
4990 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4991 }
4992
4993 free (bbs);
4994 }
4995
4996 /* For a data reference REF, return the declaration of its base
4997 address or NULL_TREE if the base is not determined. */
4998
4999 static inline tree
5000 ref_base_address (gimple stmt, data_ref_loc *ref)
5001 {
5002 tree base = NULL_TREE;
5003 tree base_address;
5004 struct data_reference *dr = XCNEW (struct data_reference);
5005
5006 DR_STMT (dr) = stmt;
5007 DR_REF (dr) = *ref->pos;
5008 dr_analyze_innermost (dr);
5009 base_address = DR_BASE_ADDRESS (dr);
5010
5011 if (!base_address)
5012 goto end;
5013
5014 switch (TREE_CODE (base_address))
5015 {
5016 case ADDR_EXPR:
5017 base = TREE_OPERAND (base_address, 0);
5018 break;
5019
5020 default:
5021 base = base_address;
5022 break;
5023 }
5024
5025 end:
5026 free_data_ref (dr);
5027 return base;
5028 }
5029
5030 /* Determines whether the statement from vertex V of the RDG has a
5031 definition used outside the loop that contains this statement. */
5032
5033 bool
5034 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5035 {
5036 gimple stmt = RDG_STMT (rdg, v);
5037 struct loop *loop = loop_containing_stmt (stmt);
5038 use_operand_p imm_use_p;
5039 imm_use_iterator iterator;
5040 ssa_op_iter it;
5041 def_operand_p def_p;
5042
5043 if (!loop)
5044 return true;
5045
5046 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5047 {
5048 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5049 {
5050 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5051 return true;
5052 }
5053 }
5054
5055 return false;
5056 }
5057
5058 /* Determines whether statements S1 and S2 access to similar memory
5059 locations. Two memory accesses are considered similar when they
5060 have the same base address declaration, i.e. when their
5061 ref_base_address is the same. */
5062
5063 bool
5064 have_similar_memory_accesses (gimple s1, gimple s2)
5065 {
5066 bool res = false;
5067 unsigned i, j;
5068 VEC (data_ref_loc, heap) *refs1, *refs2;
5069 data_ref_loc *ref1, *ref2;
5070
5071 get_references_in_stmt (s1, &refs1);
5072 get_references_in_stmt (s2, &refs2);
5073
5074 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
5075 {
5076 tree base1 = ref_base_address (s1, ref1);
5077
5078 if (base1)
5079 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5080 if (base1 == ref_base_address (s2, ref2))
5081 {
5082 res = true;
5083 goto end;
5084 }
5085 }
5086
5087 end:
5088 VEC_free (data_ref_loc, heap, refs1);
5089 VEC_free (data_ref_loc, heap, refs2);
5090 return res;
5091 }
5092
5093 /* Helper function for the hashtab. */
5094
5095 static int
5096 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5097 {
5098 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5099 CONST_CAST_GIMPLE ((const_gimple) s2));
5100 }
5101
5102 /* Helper function for the hashtab. */
5103
5104 static hashval_t
5105 ref_base_address_1 (const void *s)
5106 {
5107 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5108 unsigned i;
5109 VEC (data_ref_loc, heap) *refs;
5110 data_ref_loc *ref;
5111 hashval_t res = 0;
5112
5113 get_references_in_stmt (stmt, &refs);
5114
5115 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5116 if (!ref->is_read)
5117 {
5118 res = htab_hash_pointer (ref_base_address (stmt, ref));
5119 break;
5120 }
5121
5122 VEC_free (data_ref_loc, heap, refs);
5123 return res;
5124 }
5125
5126 /* Try to remove duplicated write data references from STMTS. */
5127
5128 void
5129 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5130 {
5131 unsigned i;
5132 gimple stmt;
5133 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5134 have_similar_memory_accesses_1, NULL);
5135
5136 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5137 {
5138 void **slot;
5139
5140 slot = htab_find_slot (seen, stmt, INSERT);
5141
5142 if (*slot)
5143 VEC_ordered_remove (gimple, *stmts, i);
5144 else
5145 {
5146 *slot = (void *) stmt;
5147 i++;
5148 }
5149 }
5150
5151 htab_delete (seen);
5152 }
5153
5154 /* Returns the index of PARAMETER in the parameters vector of the
5155 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5156
5157 int
5158 access_matrix_get_index_for_parameter (tree parameter,
5159 struct access_matrix *access_matrix)
5160 {
5161 int i;
5162 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5163 tree lambda_parameter;
5164
5165 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5166 if (lambda_parameter == parameter)
5167 return i + AM_NB_INDUCTION_VARS (access_matrix);
5168
5169 return -1;
5170 }