c-common.c, [...]: Replace tree_low_cst (..., 0) with tree_to_shwi throughout.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "tree.h"
80 #include "gimple-pretty-print.h"
81 #include "gimple.h"
82 #include "gimple-iterator.h"
83 #include "tree-ssa-loop-niter.h"
84 #include "tree-ssa-loop.h"
85 #include "tree-ssa.h"
86 #include "cfgloop.h"
87 #include "tree-data-ref.h"
88 #include "tree-scalar-evolution.h"
89 #include "dumpfile.h"
90 #include "langhooks.h"
91 #include "tree-affine.h"
92 #include "params.h"
93
94 static struct datadep_stats
95 {
96 int num_dependence_tests;
97 int num_dependence_dependent;
98 int num_dependence_independent;
99 int num_dependence_undetermined;
100
101 int num_subscript_tests;
102 int num_subscript_undetermined;
103 int num_same_subscript_function;
104
105 int num_ziv;
106 int num_ziv_independent;
107 int num_ziv_dependent;
108 int num_ziv_unimplemented;
109
110 int num_siv;
111 int num_siv_independent;
112 int num_siv_dependent;
113 int num_siv_unimplemented;
114
115 int num_miv;
116 int num_miv_independent;
117 int num_miv_dependent;
118 int num_miv_unimplemented;
119 } dependence_stats;
120
121 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
122 struct data_reference *,
123 struct data_reference *,
124 struct loop *);
125 /* Returns true iff A divides B. */
126
127 static inline bool
128 tree_fold_divides_p (const_tree a, const_tree b)
129 {
130 gcc_assert (TREE_CODE (a) == INTEGER_CST);
131 gcc_assert (TREE_CODE (b) == INTEGER_CST);
132 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
133 }
134
135 /* Returns true iff A divides B. */
136
137 static inline bool
138 int_divides_p (int a, int b)
139 {
140 return ((b % a) == 0);
141 }
142
143 \f
144
145 /* Dump into FILE all the data references from DATAREFS. */
146
147 static void
148 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
149 {
150 unsigned int i;
151 struct data_reference *dr;
152
153 FOR_EACH_VEC_ELT (datarefs, i, dr)
154 dump_data_reference (file, dr);
155 }
156
157 /* Unified dump into FILE all the data references from DATAREFS. */
158
159 DEBUG_FUNCTION void
160 debug (vec<data_reference_p> &ref)
161 {
162 dump_data_references (stderr, ref);
163 }
164
165 DEBUG_FUNCTION void
166 debug (vec<data_reference_p> *ptr)
167 {
168 if (ptr)
169 debug (*ptr);
170 else
171 fprintf (stderr, "<nil>\n");
172 }
173
174
175 /* Dump into STDERR all the data references from DATAREFS. */
176
177 DEBUG_FUNCTION void
178 debug_data_references (vec<data_reference_p> datarefs)
179 {
180 dump_data_references (stderr, datarefs);
181 }
182
183 /* Print to STDERR the data_reference DR. */
184
185 DEBUG_FUNCTION void
186 debug_data_reference (struct data_reference *dr)
187 {
188 dump_data_reference (stderr, dr);
189 }
190
191 /* Dump function for a DATA_REFERENCE structure. */
192
193 void
194 dump_data_reference (FILE *outf,
195 struct data_reference *dr)
196 {
197 unsigned int i;
198
199 fprintf (outf, "#(Data Ref: \n");
200 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
201 fprintf (outf, "# stmt: ");
202 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
203 fprintf (outf, "# ref: ");
204 print_generic_stmt (outf, DR_REF (dr), 0);
205 fprintf (outf, "# base_object: ");
206 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
207
208 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
209 {
210 fprintf (outf, "# Access function %d: ", i);
211 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
212 }
213 fprintf (outf, "#)\n");
214 }
215
216 /* Unified dump function for a DATA_REFERENCE structure. */
217
218 DEBUG_FUNCTION void
219 debug (data_reference &ref)
220 {
221 dump_data_reference (stderr, &ref);
222 }
223
224 DEBUG_FUNCTION void
225 debug (data_reference *ptr)
226 {
227 if (ptr)
228 debug (*ptr);
229 else
230 fprintf (stderr, "<nil>\n");
231 }
232
233
234 /* Dumps the affine function described by FN to the file OUTF. */
235
236 static void
237 dump_affine_function (FILE *outf, affine_fn fn)
238 {
239 unsigned i;
240 tree coef;
241
242 print_generic_expr (outf, fn[0], TDF_SLIM);
243 for (i = 1; fn.iterate (i, &coef); i++)
244 {
245 fprintf (outf, " + ");
246 print_generic_expr (outf, coef, TDF_SLIM);
247 fprintf (outf, " * x_%u", i);
248 }
249 }
250
251 /* Dumps the conflict function CF to the file OUTF. */
252
253 static void
254 dump_conflict_function (FILE *outf, conflict_function *cf)
255 {
256 unsigned i;
257
258 if (cf->n == NO_DEPENDENCE)
259 fprintf (outf, "no dependence");
260 else if (cf->n == NOT_KNOWN)
261 fprintf (outf, "not known");
262 else
263 {
264 for (i = 0; i < cf->n; i++)
265 {
266 if (i != 0)
267 fprintf (outf, " ");
268 fprintf (outf, "[");
269 dump_affine_function (outf, cf->fns[i]);
270 fprintf (outf, "]");
271 }
272 }
273 }
274
275 /* Dump function for a SUBSCRIPT structure. */
276
277 static void
278 dump_subscript (FILE *outf, struct subscript *subscript)
279 {
280 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
281
282 fprintf (outf, "\n (subscript \n");
283 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
284 dump_conflict_function (outf, cf);
285 if (CF_NONTRIVIAL_P (cf))
286 {
287 tree last_iteration = SUB_LAST_CONFLICT (subscript);
288 fprintf (outf, "\n last_conflict: ");
289 print_generic_expr (outf, last_iteration, 0);
290 }
291
292 cf = SUB_CONFLICTS_IN_B (subscript);
293 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
294 dump_conflict_function (outf, cf);
295 if (CF_NONTRIVIAL_P (cf))
296 {
297 tree last_iteration = SUB_LAST_CONFLICT (subscript);
298 fprintf (outf, "\n last_conflict: ");
299 print_generic_expr (outf, last_iteration, 0);
300 }
301
302 fprintf (outf, "\n (Subscript distance: ");
303 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
304 fprintf (outf, " ))\n");
305 }
306
307 /* Print the classic direction vector DIRV to OUTF. */
308
309 static void
310 print_direction_vector (FILE *outf,
311 lambda_vector dirv,
312 int length)
313 {
314 int eq;
315
316 for (eq = 0; eq < length; eq++)
317 {
318 enum data_dependence_direction dir = ((enum data_dependence_direction)
319 dirv[eq]);
320
321 switch (dir)
322 {
323 case dir_positive:
324 fprintf (outf, " +");
325 break;
326 case dir_negative:
327 fprintf (outf, " -");
328 break;
329 case dir_equal:
330 fprintf (outf, " =");
331 break;
332 case dir_positive_or_equal:
333 fprintf (outf, " +=");
334 break;
335 case dir_positive_or_negative:
336 fprintf (outf, " +-");
337 break;
338 case dir_negative_or_equal:
339 fprintf (outf, " -=");
340 break;
341 case dir_star:
342 fprintf (outf, " *");
343 break;
344 default:
345 fprintf (outf, "indep");
346 break;
347 }
348 }
349 fprintf (outf, "\n");
350 }
351
352 /* Print a vector of direction vectors. */
353
354 static void
355 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
356 int length)
357 {
358 unsigned j;
359 lambda_vector v;
360
361 FOR_EACH_VEC_ELT (dir_vects, j, v)
362 print_direction_vector (outf, v, length);
363 }
364
365 /* Print out a vector VEC of length N to OUTFILE. */
366
367 static inline void
368 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
369 {
370 int i;
371
372 for (i = 0; i < n; i++)
373 fprintf (outfile, "%3d ", vector[i]);
374 fprintf (outfile, "\n");
375 }
376
377 /* Print a vector of distance vectors. */
378
379 static void
380 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
381 int length)
382 {
383 unsigned j;
384 lambda_vector v;
385
386 FOR_EACH_VEC_ELT (dist_vects, j, v)
387 print_lambda_vector (outf, v, length);
388 }
389
390 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
391
392 static void
393 dump_data_dependence_relation (FILE *outf,
394 struct data_dependence_relation *ddr)
395 {
396 struct data_reference *dra, *drb;
397
398 fprintf (outf, "(Data Dep: \n");
399
400 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
401 {
402 if (ddr)
403 {
404 dra = DDR_A (ddr);
405 drb = DDR_B (ddr);
406 if (dra)
407 dump_data_reference (outf, dra);
408 else
409 fprintf (outf, " (nil)\n");
410 if (drb)
411 dump_data_reference (outf, drb);
412 else
413 fprintf (outf, " (nil)\n");
414 }
415 fprintf (outf, " (don't know)\n)\n");
416 return;
417 }
418
419 dra = DDR_A (ddr);
420 drb = DDR_B (ddr);
421 dump_data_reference (outf, dra);
422 dump_data_reference (outf, drb);
423
424 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
425 fprintf (outf, " (no dependence)\n");
426
427 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
428 {
429 unsigned int i;
430 struct loop *loopi;
431
432 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
433 {
434 fprintf (outf, " access_fn_A: ");
435 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
436 fprintf (outf, " access_fn_B: ");
437 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
438 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
439 }
440
441 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
442 fprintf (outf, " loop nest: (");
443 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
444 fprintf (outf, "%d ", loopi->num);
445 fprintf (outf, ")\n");
446
447 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
448 {
449 fprintf (outf, " distance_vector: ");
450 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
451 DDR_NB_LOOPS (ddr));
452 }
453
454 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
455 {
456 fprintf (outf, " direction_vector: ");
457 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
458 DDR_NB_LOOPS (ddr));
459 }
460 }
461
462 fprintf (outf, ")\n");
463 }
464
465 /* Debug version. */
466
467 DEBUG_FUNCTION void
468 debug_data_dependence_relation (struct data_dependence_relation *ddr)
469 {
470 dump_data_dependence_relation (stderr, ddr);
471 }
472
473 /* Dump into FILE all the dependence relations from DDRS. */
474
475 void
476 dump_data_dependence_relations (FILE *file,
477 vec<ddr_p> ddrs)
478 {
479 unsigned int i;
480 struct data_dependence_relation *ddr;
481
482 FOR_EACH_VEC_ELT (ddrs, i, ddr)
483 dump_data_dependence_relation (file, ddr);
484 }
485
486 DEBUG_FUNCTION void
487 debug (vec<ddr_p> &ref)
488 {
489 dump_data_dependence_relations (stderr, ref);
490 }
491
492 DEBUG_FUNCTION void
493 debug (vec<ddr_p> *ptr)
494 {
495 if (ptr)
496 debug (*ptr);
497 else
498 fprintf (stderr, "<nil>\n");
499 }
500
501
502 /* Dump to STDERR all the dependence relations from DDRS. */
503
504 DEBUG_FUNCTION void
505 debug_data_dependence_relations (vec<ddr_p> ddrs)
506 {
507 dump_data_dependence_relations (stderr, ddrs);
508 }
509
510 /* Dumps the distance and direction vectors in FILE. DDRS contains
511 the dependence relations, and VECT_SIZE is the size of the
512 dependence vectors, or in other words the number of loops in the
513 considered nest. */
514
515 static void
516 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
517 {
518 unsigned int i, j;
519 struct data_dependence_relation *ddr;
520 lambda_vector v;
521
522 FOR_EACH_VEC_ELT (ddrs, i, ddr)
523 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
524 {
525 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
526 {
527 fprintf (file, "DISTANCE_V (");
528 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
529 fprintf (file, ")\n");
530 }
531
532 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
533 {
534 fprintf (file, "DIRECTION_V (");
535 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
536 fprintf (file, ")\n");
537 }
538 }
539
540 fprintf (file, "\n\n");
541 }
542
543 /* Dumps the data dependence relations DDRS in FILE. */
544
545 static void
546 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
547 {
548 unsigned int i;
549 struct data_dependence_relation *ddr;
550
551 FOR_EACH_VEC_ELT (ddrs, i, ddr)
552 dump_data_dependence_relation (file, ddr);
553
554 fprintf (file, "\n\n");
555 }
556
557 DEBUG_FUNCTION void
558 debug_ddrs (vec<ddr_p> ddrs)
559 {
560 dump_ddrs (stderr, ddrs);
561 }
562
563 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
564 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
565 constant of type ssizetype, and returns true. If we cannot do this
566 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
567 is returned. */
568
569 static bool
570 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
571 tree *var, tree *off)
572 {
573 tree var0, var1;
574 tree off0, off1;
575 enum tree_code ocode = code;
576
577 *var = NULL_TREE;
578 *off = NULL_TREE;
579
580 switch (code)
581 {
582 case INTEGER_CST:
583 *var = build_int_cst (type, 0);
584 *off = fold_convert (ssizetype, op0);
585 return true;
586
587 case POINTER_PLUS_EXPR:
588 ocode = PLUS_EXPR;
589 /* FALLTHROUGH */
590 case PLUS_EXPR:
591 case MINUS_EXPR:
592 split_constant_offset (op0, &var0, &off0);
593 split_constant_offset (op1, &var1, &off1);
594 *var = fold_build2 (code, type, var0, var1);
595 *off = size_binop (ocode, off0, off1);
596 return true;
597
598 case MULT_EXPR:
599 if (TREE_CODE (op1) != INTEGER_CST)
600 return false;
601
602 split_constant_offset (op0, &var0, &off0);
603 *var = fold_build2 (MULT_EXPR, type, var0, op1);
604 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
605 return true;
606
607 case ADDR_EXPR:
608 {
609 tree base, poffset;
610 HOST_WIDE_INT pbitsize, pbitpos;
611 enum machine_mode pmode;
612 int punsignedp, pvolatilep;
613
614 op0 = TREE_OPERAND (op0, 0);
615 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
616 &pmode, &punsignedp, &pvolatilep, false);
617
618 if (pbitpos % BITS_PER_UNIT != 0)
619 return false;
620 base = build_fold_addr_expr (base);
621 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
622
623 if (poffset)
624 {
625 split_constant_offset (poffset, &poffset, &off1);
626 off0 = size_binop (PLUS_EXPR, off0, off1);
627 if (POINTER_TYPE_P (TREE_TYPE (base)))
628 base = fold_build_pointer_plus (base, poffset);
629 else
630 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
631 fold_convert (TREE_TYPE (base), poffset));
632 }
633
634 var0 = fold_convert (type, base);
635
636 /* If variable length types are involved, punt, otherwise casts
637 might be converted into ARRAY_REFs in gimplify_conversion.
638 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
639 possibly no longer appears in current GIMPLE, might resurface.
640 This perhaps could run
641 if (CONVERT_EXPR_P (var0))
642 {
643 gimplify_conversion (&var0);
644 // Attempt to fill in any within var0 found ARRAY_REF's
645 // element size from corresponding op embedded ARRAY_REF,
646 // if unsuccessful, just punt.
647 } */
648 while (POINTER_TYPE_P (type))
649 type = TREE_TYPE (type);
650 if (int_size_in_bytes (type) < 0)
651 return false;
652
653 *var = var0;
654 *off = off0;
655 return true;
656 }
657
658 case SSA_NAME:
659 {
660 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
661 enum tree_code subcode;
662
663 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
664 return false;
665
666 var0 = gimple_assign_rhs1 (def_stmt);
667 subcode = gimple_assign_rhs_code (def_stmt);
668 var1 = gimple_assign_rhs2 (def_stmt);
669
670 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
671 }
672 CASE_CONVERT:
673 {
674 /* We must not introduce undefined overflow, and we must not change the value.
675 Hence we're okay if the inner type doesn't overflow to start with
676 (pointer or signed), the outer type also is an integer or pointer
677 and the outer precision is at least as large as the inner. */
678 tree itype = TREE_TYPE (op0);
679 if ((POINTER_TYPE_P (itype)
680 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
681 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
682 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
683 {
684 split_constant_offset (op0, &var0, off);
685 *var = fold_convert (type, var0);
686 return true;
687 }
688 return false;
689 }
690
691 default:
692 return false;
693 }
694 }
695
696 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
697 will be ssizetype. */
698
699 void
700 split_constant_offset (tree exp, tree *var, tree *off)
701 {
702 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
703 enum tree_code code;
704
705 *var = exp;
706 *off = ssize_int (0);
707 STRIP_NOPS (exp);
708
709 if (tree_is_chrec (exp)
710 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
711 return;
712
713 otype = TREE_TYPE (exp);
714 code = TREE_CODE (exp);
715 extract_ops_from_tree (exp, &code, &op0, &op1);
716 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
717 {
718 *var = fold_convert (type, e);
719 *off = o;
720 }
721 }
722
723 /* Returns the address ADDR of an object in a canonical shape (without nop
724 casts, and with type of pointer to the object). */
725
726 static tree
727 canonicalize_base_object_address (tree addr)
728 {
729 tree orig = addr;
730
731 STRIP_NOPS (addr);
732
733 /* The base address may be obtained by casting from integer, in that case
734 keep the cast. */
735 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
736 return orig;
737
738 if (TREE_CODE (addr) != ADDR_EXPR)
739 return addr;
740
741 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
742 }
743
744 /* Analyzes the behavior of the memory reference DR in the innermost loop or
745 basic block that contains it. Returns true if analysis succeed or false
746 otherwise. */
747
748 bool
749 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
750 {
751 gimple stmt = DR_STMT (dr);
752 struct loop *loop = loop_containing_stmt (stmt);
753 tree ref = DR_REF (dr);
754 HOST_WIDE_INT pbitsize, pbitpos;
755 tree base, poffset;
756 enum machine_mode pmode;
757 int punsignedp, pvolatilep;
758 affine_iv base_iv, offset_iv;
759 tree init, dinit, step;
760 bool in_loop = (loop && loop->num);
761
762 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "analyze_innermost: ");
764
765 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
766 &pmode, &punsignedp, &pvolatilep, false);
767 gcc_assert (base != NULL_TREE);
768
769 if (pbitpos % BITS_PER_UNIT != 0)
770 {
771 if (dump_file && (dump_flags & TDF_DETAILS))
772 fprintf (dump_file, "failed: bit offset alignment.\n");
773 return false;
774 }
775
776 if (TREE_CODE (base) == MEM_REF)
777 {
778 if (!integer_zerop (TREE_OPERAND (base, 1)))
779 {
780 double_int moff = mem_ref_offset (base);
781 tree mofft = double_int_to_tree (sizetype, moff);
782 if (!poffset)
783 poffset = mofft;
784 else
785 poffset = size_binop (PLUS_EXPR, poffset, mofft);
786 }
787 base = TREE_OPERAND (base, 0);
788 }
789 else
790 base = build_fold_addr_expr (base);
791
792 if (in_loop)
793 {
794 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
795 nest ? true : false))
796 {
797 if (nest)
798 {
799 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "failed: evolution of base is not"
801 " affine.\n");
802 return false;
803 }
804 else
805 {
806 base_iv.base = base;
807 base_iv.step = ssize_int (0);
808 base_iv.no_overflow = true;
809 }
810 }
811 }
812 else
813 {
814 base_iv.base = base;
815 base_iv.step = ssize_int (0);
816 base_iv.no_overflow = true;
817 }
818
819 if (!poffset)
820 {
821 offset_iv.base = ssize_int (0);
822 offset_iv.step = ssize_int (0);
823 }
824 else
825 {
826 if (!in_loop)
827 {
828 offset_iv.base = poffset;
829 offset_iv.step = ssize_int (0);
830 }
831 else if (!simple_iv (loop, loop_containing_stmt (stmt),
832 poffset, &offset_iv,
833 nest ? true : false))
834 {
835 if (nest)
836 {
837 if (dump_file && (dump_flags & TDF_DETAILS))
838 fprintf (dump_file, "failed: evolution of offset is not"
839 " affine.\n");
840 return false;
841 }
842 else
843 {
844 offset_iv.base = poffset;
845 offset_iv.step = ssize_int (0);
846 }
847 }
848 }
849
850 init = ssize_int (pbitpos / BITS_PER_UNIT);
851 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
852 init = size_binop (PLUS_EXPR, init, dinit);
853 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
854 init = size_binop (PLUS_EXPR, init, dinit);
855
856 step = size_binop (PLUS_EXPR,
857 fold_convert (ssizetype, base_iv.step),
858 fold_convert (ssizetype, offset_iv.step));
859
860 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
861
862 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
863 DR_INIT (dr) = init;
864 DR_STEP (dr) = step;
865
866 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
867
868 if (dump_file && (dump_flags & TDF_DETAILS))
869 fprintf (dump_file, "success.\n");
870
871 return true;
872 }
873
874 /* Determines the base object and the list of indices of memory reference
875 DR, analyzed in LOOP and instantiated in loop nest NEST. */
876
877 static void
878 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
879 {
880 vec<tree> access_fns = vNULL;
881 tree ref, op;
882 tree base, off, access_fn;
883 basic_block before_loop;
884
885 /* If analyzing a basic-block there are no indices to analyze
886 and thus no access functions. */
887 if (!nest)
888 {
889 DR_BASE_OBJECT (dr) = DR_REF (dr);
890 DR_ACCESS_FNS (dr).create (0);
891 return;
892 }
893
894 ref = DR_REF (dr);
895 before_loop = block_before_loop (nest);
896
897 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
898 into a two element array with a constant index. The base is
899 then just the immediate underlying object. */
900 if (TREE_CODE (ref) == REALPART_EXPR)
901 {
902 ref = TREE_OPERAND (ref, 0);
903 access_fns.safe_push (integer_zero_node);
904 }
905 else if (TREE_CODE (ref) == IMAGPART_EXPR)
906 {
907 ref = TREE_OPERAND (ref, 0);
908 access_fns.safe_push (integer_one_node);
909 }
910
911 /* Analyze access functions of dimensions we know to be independent. */
912 while (handled_component_p (ref))
913 {
914 if (TREE_CODE (ref) == ARRAY_REF)
915 {
916 op = TREE_OPERAND (ref, 1);
917 access_fn = analyze_scalar_evolution (loop, op);
918 access_fn = instantiate_scev (before_loop, loop, access_fn);
919 access_fns.safe_push (access_fn);
920 }
921 else if (TREE_CODE (ref) == COMPONENT_REF
922 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
923 {
924 /* For COMPONENT_REFs of records (but not unions!) use the
925 FIELD_DECL offset as constant access function so we can
926 disambiguate a[i].f1 and a[i].f2. */
927 tree off = component_ref_field_offset (ref);
928 off = size_binop (PLUS_EXPR,
929 size_binop (MULT_EXPR,
930 fold_convert (bitsizetype, off),
931 bitsize_int (BITS_PER_UNIT)),
932 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
933 access_fns.safe_push (off);
934 }
935 else
936 /* If we have an unhandled component we could not translate
937 to an access function stop analyzing. We have determined
938 our base object in this case. */
939 break;
940
941 ref = TREE_OPERAND (ref, 0);
942 }
943
944 /* If the address operand of a MEM_REF base has an evolution in the
945 analyzed nest, add it as an additional independent access-function. */
946 if (TREE_CODE (ref) == MEM_REF)
947 {
948 op = TREE_OPERAND (ref, 0);
949 access_fn = analyze_scalar_evolution (loop, op);
950 access_fn = instantiate_scev (before_loop, loop, access_fn);
951 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
952 {
953 tree orig_type;
954 tree memoff = TREE_OPERAND (ref, 1);
955 base = initial_condition (access_fn);
956 orig_type = TREE_TYPE (base);
957 STRIP_USELESS_TYPE_CONVERSION (base);
958 split_constant_offset (base, &base, &off);
959 /* Fold the MEM_REF offset into the evolutions initial
960 value to make more bases comparable. */
961 if (!integer_zerop (memoff))
962 {
963 off = size_binop (PLUS_EXPR, off,
964 fold_convert (ssizetype, memoff));
965 memoff = build_int_cst (TREE_TYPE (memoff), 0);
966 }
967 access_fn = chrec_replace_initial_condition
968 (access_fn, fold_convert (orig_type, off));
969 /* ??? This is still not a suitable base object for
970 dr_may_alias_p - the base object needs to be an
971 access that covers the object as whole. With
972 an evolution in the pointer this cannot be
973 guaranteed.
974 As a band-aid, mark the access so we can special-case
975 it in dr_may_alias_p. */
976 ref = fold_build2_loc (EXPR_LOCATION (ref),
977 MEM_REF, TREE_TYPE (ref),
978 base, memoff);
979 DR_UNCONSTRAINED_BASE (dr) = true;
980 access_fns.safe_push (access_fn);
981 }
982 }
983 else if (DECL_P (ref))
984 {
985 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
986 ref = build2 (MEM_REF, TREE_TYPE (ref),
987 build_fold_addr_expr (ref),
988 build_int_cst (reference_alias_ptr_type (ref), 0));
989 }
990
991 DR_BASE_OBJECT (dr) = ref;
992 DR_ACCESS_FNS (dr) = access_fns;
993 }
994
995 /* Extracts the alias analysis information from the memory reference DR. */
996
997 static void
998 dr_analyze_alias (struct data_reference *dr)
999 {
1000 tree ref = DR_REF (dr);
1001 tree base = get_base_address (ref), addr;
1002
1003 if (INDIRECT_REF_P (base)
1004 || TREE_CODE (base) == MEM_REF)
1005 {
1006 addr = TREE_OPERAND (base, 0);
1007 if (TREE_CODE (addr) == SSA_NAME)
1008 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1009 }
1010 }
1011
1012 /* Frees data reference DR. */
1013
1014 void
1015 free_data_ref (data_reference_p dr)
1016 {
1017 DR_ACCESS_FNS (dr).release ();
1018 free (dr);
1019 }
1020
1021 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1022 is read if IS_READ is true, write otherwise. Returns the
1023 data_reference description of MEMREF. NEST is the outermost loop
1024 in which the reference should be instantiated, LOOP is the loop in
1025 which the data reference should be analyzed. */
1026
1027 struct data_reference *
1028 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1029 bool is_read)
1030 {
1031 struct data_reference *dr;
1032
1033 if (dump_file && (dump_flags & TDF_DETAILS))
1034 {
1035 fprintf (dump_file, "Creating dr for ");
1036 print_generic_expr (dump_file, memref, TDF_SLIM);
1037 fprintf (dump_file, "\n");
1038 }
1039
1040 dr = XCNEW (struct data_reference);
1041 DR_STMT (dr) = stmt;
1042 DR_REF (dr) = memref;
1043 DR_IS_READ (dr) = is_read;
1044
1045 dr_analyze_innermost (dr, nest);
1046 dr_analyze_indices (dr, nest, loop);
1047 dr_analyze_alias (dr);
1048
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 {
1051 unsigned i;
1052 fprintf (dump_file, "\tbase_address: ");
1053 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1054 fprintf (dump_file, "\n\toffset from base address: ");
1055 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1056 fprintf (dump_file, "\n\tconstant offset from base address: ");
1057 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1058 fprintf (dump_file, "\n\tstep: ");
1059 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1060 fprintf (dump_file, "\n\taligned to: ");
1061 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1062 fprintf (dump_file, "\n\tbase_object: ");
1063 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1064 fprintf (dump_file, "\n");
1065 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1066 {
1067 fprintf (dump_file, "\tAccess function %d: ", i);
1068 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1069 }
1070 }
1071
1072 return dr;
1073 }
1074
1075 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1076 expressions. */
1077 static bool
1078 dr_equal_offsets_p1 (tree offset1, tree offset2)
1079 {
1080 bool res;
1081
1082 STRIP_NOPS (offset1);
1083 STRIP_NOPS (offset2);
1084
1085 if (offset1 == offset2)
1086 return true;
1087
1088 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1089 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1090 return false;
1091
1092 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1093 TREE_OPERAND (offset2, 0));
1094
1095 if (!res || !BINARY_CLASS_P (offset1))
1096 return res;
1097
1098 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1099 TREE_OPERAND (offset2, 1));
1100
1101 return res;
1102 }
1103
1104 /* Check if DRA and DRB have equal offsets. */
1105 bool
1106 dr_equal_offsets_p (struct data_reference *dra,
1107 struct data_reference *drb)
1108 {
1109 tree offset1, offset2;
1110
1111 offset1 = DR_OFFSET (dra);
1112 offset2 = DR_OFFSET (drb);
1113
1114 return dr_equal_offsets_p1 (offset1, offset2);
1115 }
1116
1117 /* Returns true if FNA == FNB. */
1118
1119 static bool
1120 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1121 {
1122 unsigned i, n = fna.length ();
1123
1124 if (n != fnb.length ())
1125 return false;
1126
1127 for (i = 0; i < n; i++)
1128 if (!operand_equal_p (fna[i], fnb[i], 0))
1129 return false;
1130
1131 return true;
1132 }
1133
1134 /* If all the functions in CF are the same, returns one of them,
1135 otherwise returns NULL. */
1136
1137 static affine_fn
1138 common_affine_function (conflict_function *cf)
1139 {
1140 unsigned i;
1141 affine_fn comm;
1142
1143 if (!CF_NONTRIVIAL_P (cf))
1144 return affine_fn ();
1145
1146 comm = cf->fns[0];
1147
1148 for (i = 1; i < cf->n; i++)
1149 if (!affine_function_equal_p (comm, cf->fns[i]))
1150 return affine_fn ();
1151
1152 return comm;
1153 }
1154
1155 /* Returns the base of the affine function FN. */
1156
1157 static tree
1158 affine_function_base (affine_fn fn)
1159 {
1160 return fn[0];
1161 }
1162
1163 /* Returns true if FN is a constant. */
1164
1165 static bool
1166 affine_function_constant_p (affine_fn fn)
1167 {
1168 unsigned i;
1169 tree coef;
1170
1171 for (i = 1; fn.iterate (i, &coef); i++)
1172 if (!integer_zerop (coef))
1173 return false;
1174
1175 return true;
1176 }
1177
1178 /* Returns true if FN is the zero constant function. */
1179
1180 static bool
1181 affine_function_zero_p (affine_fn fn)
1182 {
1183 return (integer_zerop (affine_function_base (fn))
1184 && affine_function_constant_p (fn));
1185 }
1186
1187 /* Returns a signed integer type with the largest precision from TA
1188 and TB. */
1189
1190 static tree
1191 signed_type_for_types (tree ta, tree tb)
1192 {
1193 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1194 return signed_type_for (ta);
1195 else
1196 return signed_type_for (tb);
1197 }
1198
1199 /* Applies operation OP on affine functions FNA and FNB, and returns the
1200 result. */
1201
1202 static affine_fn
1203 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1204 {
1205 unsigned i, n, m;
1206 affine_fn ret;
1207 tree coef;
1208
1209 if (fnb.length () > fna.length ())
1210 {
1211 n = fna.length ();
1212 m = fnb.length ();
1213 }
1214 else
1215 {
1216 n = fnb.length ();
1217 m = fna.length ();
1218 }
1219
1220 ret.create (m);
1221 for (i = 0; i < n; i++)
1222 {
1223 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1224 TREE_TYPE (fnb[i]));
1225 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1226 }
1227
1228 for (; fna.iterate (i, &coef); i++)
1229 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1230 coef, integer_zero_node));
1231 for (; fnb.iterate (i, &coef); i++)
1232 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1233 integer_zero_node, coef));
1234
1235 return ret;
1236 }
1237
1238 /* Returns the sum of affine functions FNA and FNB. */
1239
1240 static affine_fn
1241 affine_fn_plus (affine_fn fna, affine_fn fnb)
1242 {
1243 return affine_fn_op (PLUS_EXPR, fna, fnb);
1244 }
1245
1246 /* Returns the difference of affine functions FNA and FNB. */
1247
1248 static affine_fn
1249 affine_fn_minus (affine_fn fna, affine_fn fnb)
1250 {
1251 return affine_fn_op (MINUS_EXPR, fna, fnb);
1252 }
1253
1254 /* Frees affine function FN. */
1255
1256 static void
1257 affine_fn_free (affine_fn fn)
1258 {
1259 fn.release ();
1260 }
1261
1262 /* Determine for each subscript in the data dependence relation DDR
1263 the distance. */
1264
1265 static void
1266 compute_subscript_distance (struct data_dependence_relation *ddr)
1267 {
1268 conflict_function *cf_a, *cf_b;
1269 affine_fn fn_a, fn_b, diff;
1270
1271 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1272 {
1273 unsigned int i;
1274
1275 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1276 {
1277 struct subscript *subscript;
1278
1279 subscript = DDR_SUBSCRIPT (ddr, i);
1280 cf_a = SUB_CONFLICTS_IN_A (subscript);
1281 cf_b = SUB_CONFLICTS_IN_B (subscript);
1282
1283 fn_a = common_affine_function (cf_a);
1284 fn_b = common_affine_function (cf_b);
1285 if (!fn_a.exists () || !fn_b.exists ())
1286 {
1287 SUB_DISTANCE (subscript) = chrec_dont_know;
1288 return;
1289 }
1290 diff = affine_fn_minus (fn_a, fn_b);
1291
1292 if (affine_function_constant_p (diff))
1293 SUB_DISTANCE (subscript) = affine_function_base (diff);
1294 else
1295 SUB_DISTANCE (subscript) = chrec_dont_know;
1296
1297 affine_fn_free (diff);
1298 }
1299 }
1300 }
1301
1302 /* Returns the conflict function for "unknown". */
1303
1304 static conflict_function *
1305 conflict_fn_not_known (void)
1306 {
1307 conflict_function *fn = XCNEW (conflict_function);
1308 fn->n = NOT_KNOWN;
1309
1310 return fn;
1311 }
1312
1313 /* Returns the conflict function for "independent". */
1314
1315 static conflict_function *
1316 conflict_fn_no_dependence (void)
1317 {
1318 conflict_function *fn = XCNEW (conflict_function);
1319 fn->n = NO_DEPENDENCE;
1320
1321 return fn;
1322 }
1323
1324 /* Returns true if the address of OBJ is invariant in LOOP. */
1325
1326 static bool
1327 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1328 {
1329 while (handled_component_p (obj))
1330 {
1331 if (TREE_CODE (obj) == ARRAY_REF)
1332 {
1333 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1334 need to check the stride and the lower bound of the reference. */
1335 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1336 loop->num)
1337 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1338 loop->num))
1339 return false;
1340 }
1341 else if (TREE_CODE (obj) == COMPONENT_REF)
1342 {
1343 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1344 loop->num))
1345 return false;
1346 }
1347 obj = TREE_OPERAND (obj, 0);
1348 }
1349
1350 if (!INDIRECT_REF_P (obj)
1351 && TREE_CODE (obj) != MEM_REF)
1352 return true;
1353
1354 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1355 loop->num);
1356 }
1357
1358 /* Returns false if we can prove that data references A and B do not alias,
1359 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1360 considered. */
1361
1362 bool
1363 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1364 bool loop_nest)
1365 {
1366 tree addr_a = DR_BASE_OBJECT (a);
1367 tree addr_b = DR_BASE_OBJECT (b);
1368
1369 /* If we are not processing a loop nest but scalar code we
1370 do not need to care about possible cross-iteration dependences
1371 and thus can process the full original reference. Do so,
1372 similar to how loop invariant motion applies extra offset-based
1373 disambiguation. */
1374 if (!loop_nest)
1375 {
1376 aff_tree off1, off2;
1377 double_int size1, size2;
1378 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1379 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1380 aff_combination_scale (&off1, double_int_minus_one);
1381 aff_combination_add (&off2, &off1);
1382 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1383 return false;
1384 }
1385
1386 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1387 the size of the base-object. So we cannot do any offset/overlap
1388 based analysis but have to rely on points-to information only. */
1389 if (TREE_CODE (addr_a) == MEM_REF
1390 && DR_UNCONSTRAINED_BASE (a))
1391 {
1392 if (TREE_CODE (addr_b) == MEM_REF
1393 && DR_UNCONSTRAINED_BASE (b))
1394 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1395 TREE_OPERAND (addr_b, 0));
1396 else
1397 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1398 build_fold_addr_expr (addr_b));
1399 }
1400 else if (TREE_CODE (addr_b) == MEM_REF
1401 && DR_UNCONSTRAINED_BASE (b))
1402 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1403 TREE_OPERAND (addr_b, 0));
1404
1405 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1406 that is being subsetted in the loop nest. */
1407 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1408 return refs_output_dependent_p (addr_a, addr_b);
1409 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1410 return refs_anti_dependent_p (addr_a, addr_b);
1411 return refs_may_alias_p (addr_a, addr_b);
1412 }
1413
1414 /* Initialize a data dependence relation between data accesses A and
1415 B. NB_LOOPS is the number of loops surrounding the references: the
1416 size of the classic distance/direction vectors. */
1417
1418 struct data_dependence_relation *
1419 initialize_data_dependence_relation (struct data_reference *a,
1420 struct data_reference *b,
1421 vec<loop_p> loop_nest)
1422 {
1423 struct data_dependence_relation *res;
1424 unsigned int i;
1425
1426 res = XNEW (struct data_dependence_relation);
1427 DDR_A (res) = a;
1428 DDR_B (res) = b;
1429 DDR_LOOP_NEST (res).create (0);
1430 DDR_REVERSED_P (res) = false;
1431 DDR_SUBSCRIPTS (res).create (0);
1432 DDR_DIR_VECTS (res).create (0);
1433 DDR_DIST_VECTS (res).create (0);
1434
1435 if (a == NULL || b == NULL)
1436 {
1437 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1438 return res;
1439 }
1440
1441 /* If the data references do not alias, then they are independent. */
1442 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1443 {
1444 DDR_ARE_DEPENDENT (res) = chrec_known;
1445 return res;
1446 }
1447
1448 /* The case where the references are exactly the same. */
1449 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1450 {
1451 if (loop_nest.exists ()
1452 && !object_address_invariant_in_loop_p (loop_nest[0],
1453 DR_BASE_OBJECT (a)))
1454 {
1455 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1456 return res;
1457 }
1458 DDR_AFFINE_P (res) = true;
1459 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1460 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1461 DDR_LOOP_NEST (res) = loop_nest;
1462 DDR_INNER_LOOP (res) = 0;
1463 DDR_SELF_REFERENCE (res) = true;
1464 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1465 {
1466 struct subscript *subscript;
1467
1468 subscript = XNEW (struct subscript);
1469 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1470 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1471 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1472 SUB_DISTANCE (subscript) = chrec_dont_know;
1473 DDR_SUBSCRIPTS (res).safe_push (subscript);
1474 }
1475 return res;
1476 }
1477
1478 /* If the references do not access the same object, we do not know
1479 whether they alias or not. */
1480 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1481 {
1482 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1483 return res;
1484 }
1485
1486 /* If the base of the object is not invariant in the loop nest, we cannot
1487 analyze it. TODO -- in fact, it would suffice to record that there may
1488 be arbitrary dependences in the loops where the base object varies. */
1489 if (loop_nest.exists ()
1490 && !object_address_invariant_in_loop_p (loop_nest[0],
1491 DR_BASE_OBJECT (a)))
1492 {
1493 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1494 return res;
1495 }
1496
1497 /* If the number of dimensions of the access to not agree we can have
1498 a pointer access to a component of the array element type and an
1499 array access while the base-objects are still the same. Punt. */
1500 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1501 {
1502 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1503 return res;
1504 }
1505
1506 DDR_AFFINE_P (res) = true;
1507 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1508 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1509 DDR_LOOP_NEST (res) = loop_nest;
1510 DDR_INNER_LOOP (res) = 0;
1511 DDR_SELF_REFERENCE (res) = false;
1512
1513 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1514 {
1515 struct subscript *subscript;
1516
1517 subscript = XNEW (struct subscript);
1518 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1519 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1520 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1521 SUB_DISTANCE (subscript) = chrec_dont_know;
1522 DDR_SUBSCRIPTS (res).safe_push (subscript);
1523 }
1524
1525 return res;
1526 }
1527
1528 /* Frees memory used by the conflict function F. */
1529
1530 static void
1531 free_conflict_function (conflict_function *f)
1532 {
1533 unsigned i;
1534
1535 if (CF_NONTRIVIAL_P (f))
1536 {
1537 for (i = 0; i < f->n; i++)
1538 affine_fn_free (f->fns[i]);
1539 }
1540 free (f);
1541 }
1542
1543 /* Frees memory used by SUBSCRIPTS. */
1544
1545 static void
1546 free_subscripts (vec<subscript_p> subscripts)
1547 {
1548 unsigned i;
1549 subscript_p s;
1550
1551 FOR_EACH_VEC_ELT (subscripts, i, s)
1552 {
1553 free_conflict_function (s->conflicting_iterations_in_a);
1554 free_conflict_function (s->conflicting_iterations_in_b);
1555 free (s);
1556 }
1557 subscripts.release ();
1558 }
1559
1560 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1561 description. */
1562
1563 static inline void
1564 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1565 tree chrec)
1566 {
1567 DDR_ARE_DEPENDENT (ddr) = chrec;
1568 free_subscripts (DDR_SUBSCRIPTS (ddr));
1569 DDR_SUBSCRIPTS (ddr).create (0);
1570 }
1571
1572 /* The dependence relation DDR cannot be represented by a distance
1573 vector. */
1574
1575 static inline void
1576 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1577 {
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1580
1581 DDR_AFFINE_P (ddr) = false;
1582 }
1583
1584 \f
1585
1586 /* This section contains the classic Banerjee tests. */
1587
1588 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1589 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1590
1591 static inline bool
1592 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1593 {
1594 return (evolution_function_is_constant_p (chrec_a)
1595 && evolution_function_is_constant_p (chrec_b));
1596 }
1597
1598 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1599 variable, i.e., if the SIV (Single Index Variable) test is true. */
1600
1601 static bool
1602 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1603 {
1604 if ((evolution_function_is_constant_p (chrec_a)
1605 && evolution_function_is_univariate_p (chrec_b))
1606 || (evolution_function_is_constant_p (chrec_b)
1607 && evolution_function_is_univariate_p (chrec_a)))
1608 return true;
1609
1610 if (evolution_function_is_univariate_p (chrec_a)
1611 && evolution_function_is_univariate_p (chrec_b))
1612 {
1613 switch (TREE_CODE (chrec_a))
1614 {
1615 case POLYNOMIAL_CHREC:
1616 switch (TREE_CODE (chrec_b))
1617 {
1618 case POLYNOMIAL_CHREC:
1619 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1620 return false;
1621
1622 default:
1623 return true;
1624 }
1625
1626 default:
1627 return true;
1628 }
1629 }
1630
1631 return false;
1632 }
1633
1634 /* Creates a conflict function with N dimensions. The affine functions
1635 in each dimension follow. */
1636
1637 static conflict_function *
1638 conflict_fn (unsigned n, ...)
1639 {
1640 unsigned i;
1641 conflict_function *ret = XCNEW (conflict_function);
1642 va_list ap;
1643
1644 gcc_assert (0 < n && n <= MAX_DIM);
1645 va_start (ap, n);
1646
1647 ret->n = n;
1648 for (i = 0; i < n; i++)
1649 ret->fns[i] = va_arg (ap, affine_fn);
1650 va_end (ap);
1651
1652 return ret;
1653 }
1654
1655 /* Returns constant affine function with value CST. */
1656
1657 static affine_fn
1658 affine_fn_cst (tree cst)
1659 {
1660 affine_fn fn;
1661 fn.create (1);
1662 fn.quick_push (cst);
1663 return fn;
1664 }
1665
1666 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1667
1668 static affine_fn
1669 affine_fn_univar (tree cst, unsigned dim, tree coef)
1670 {
1671 affine_fn fn;
1672 fn.create (dim + 1);
1673 unsigned i;
1674
1675 gcc_assert (dim > 0);
1676 fn.quick_push (cst);
1677 for (i = 1; i < dim; i++)
1678 fn.quick_push (integer_zero_node);
1679 fn.quick_push (coef);
1680 return fn;
1681 }
1682
1683 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1684 *OVERLAPS_B are initialized to the functions that describe the
1685 relation between the elements accessed twice by CHREC_A and
1686 CHREC_B. For k >= 0, the following property is verified:
1687
1688 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1689
1690 static void
1691 analyze_ziv_subscript (tree chrec_a,
1692 tree chrec_b,
1693 conflict_function **overlaps_a,
1694 conflict_function **overlaps_b,
1695 tree *last_conflicts)
1696 {
1697 tree type, difference;
1698 dependence_stats.num_ziv++;
1699
1700 if (dump_file && (dump_flags & TDF_DETAILS))
1701 fprintf (dump_file, "(analyze_ziv_subscript \n");
1702
1703 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1704 chrec_a = chrec_convert (type, chrec_a, NULL);
1705 chrec_b = chrec_convert (type, chrec_b, NULL);
1706 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1707
1708 switch (TREE_CODE (difference))
1709 {
1710 case INTEGER_CST:
1711 if (integer_zerop (difference))
1712 {
1713 /* The difference is equal to zero: the accessed index
1714 overlaps for each iteration in the loop. */
1715 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1716 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1717 *last_conflicts = chrec_dont_know;
1718 dependence_stats.num_ziv_dependent++;
1719 }
1720 else
1721 {
1722 /* The accesses do not overlap. */
1723 *overlaps_a = conflict_fn_no_dependence ();
1724 *overlaps_b = conflict_fn_no_dependence ();
1725 *last_conflicts = integer_zero_node;
1726 dependence_stats.num_ziv_independent++;
1727 }
1728 break;
1729
1730 default:
1731 /* We're not sure whether the indexes overlap. For the moment,
1732 conservatively answer "don't know". */
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1735
1736 *overlaps_a = conflict_fn_not_known ();
1737 *overlaps_b = conflict_fn_not_known ();
1738 *last_conflicts = chrec_dont_know;
1739 dependence_stats.num_ziv_unimplemented++;
1740 break;
1741 }
1742
1743 if (dump_file && (dump_flags & TDF_DETAILS))
1744 fprintf (dump_file, ")\n");
1745 }
1746
1747 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1748 and only if it fits to the int type. If this is not the case, or the
1749 bound on the number of iterations of LOOP could not be derived, returns
1750 chrec_dont_know. */
1751
1752 static tree
1753 max_stmt_executions_tree (struct loop *loop)
1754 {
1755 double_int nit;
1756
1757 if (!max_stmt_executions (loop, &nit))
1758 return chrec_dont_know;
1759
1760 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1761 return chrec_dont_know;
1762
1763 return double_int_to_tree (unsigned_type_node, nit);
1764 }
1765
1766 /* Determine whether the CHREC is always positive/negative. If the expression
1767 cannot be statically analyzed, return false, otherwise set the answer into
1768 VALUE. */
1769
1770 static bool
1771 chrec_is_positive (tree chrec, bool *value)
1772 {
1773 bool value0, value1, value2;
1774 tree end_value, nb_iter;
1775
1776 switch (TREE_CODE (chrec))
1777 {
1778 case POLYNOMIAL_CHREC:
1779 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1780 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1781 return false;
1782
1783 /* FIXME -- overflows. */
1784 if (value0 == value1)
1785 {
1786 *value = value0;
1787 return true;
1788 }
1789
1790 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1791 and the proof consists in showing that the sign never
1792 changes during the execution of the loop, from 0 to
1793 loop->nb_iterations. */
1794 if (!evolution_function_is_affine_p (chrec))
1795 return false;
1796
1797 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1798 if (chrec_contains_undetermined (nb_iter))
1799 return false;
1800
1801 #if 0
1802 /* TODO -- If the test is after the exit, we may decrease the number of
1803 iterations by one. */
1804 if (after_exit)
1805 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1806 #endif
1807
1808 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1809
1810 if (!chrec_is_positive (end_value, &value2))
1811 return false;
1812
1813 *value = value0;
1814 return value0 == value1;
1815
1816 case INTEGER_CST:
1817 switch (tree_int_cst_sgn (chrec))
1818 {
1819 case -1:
1820 *value = false;
1821 break;
1822 case 1:
1823 *value = true;
1824 break;
1825 default:
1826 return false;
1827 }
1828 return true;
1829
1830 default:
1831 return false;
1832 }
1833 }
1834
1835
1836 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1837 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1838 *OVERLAPS_B are initialized to the functions that describe the
1839 relation between the elements accessed twice by CHREC_A and
1840 CHREC_B. For k >= 0, the following property is verified:
1841
1842 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1843
1844 static void
1845 analyze_siv_subscript_cst_affine (tree chrec_a,
1846 tree chrec_b,
1847 conflict_function **overlaps_a,
1848 conflict_function **overlaps_b,
1849 tree *last_conflicts)
1850 {
1851 bool value0, value1, value2;
1852 tree type, difference, tmp;
1853
1854 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1855 chrec_a = chrec_convert (type, chrec_a, NULL);
1856 chrec_b = chrec_convert (type, chrec_b, NULL);
1857 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1858
1859 /* Special case overlap in the first iteration. */
1860 if (integer_zerop (difference))
1861 {
1862 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1863 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1864 *last_conflicts = integer_one_node;
1865 return;
1866 }
1867
1868 if (!chrec_is_positive (initial_condition (difference), &value0))
1869 {
1870 if (dump_file && (dump_flags & TDF_DETAILS))
1871 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1872
1873 dependence_stats.num_siv_unimplemented++;
1874 *overlaps_a = conflict_fn_not_known ();
1875 *overlaps_b = conflict_fn_not_known ();
1876 *last_conflicts = chrec_dont_know;
1877 return;
1878 }
1879 else
1880 {
1881 if (value0 == false)
1882 {
1883 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1884 {
1885 if (dump_file && (dump_flags & TDF_DETAILS))
1886 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1887
1888 *overlaps_a = conflict_fn_not_known ();
1889 *overlaps_b = conflict_fn_not_known ();
1890 *last_conflicts = chrec_dont_know;
1891 dependence_stats.num_siv_unimplemented++;
1892 return;
1893 }
1894 else
1895 {
1896 if (value1 == true)
1897 {
1898 /* Example:
1899 chrec_a = 12
1900 chrec_b = {10, +, 1}
1901 */
1902
1903 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1904 {
1905 HOST_WIDE_INT numiter;
1906 struct loop *loop = get_chrec_loop (chrec_b);
1907
1908 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1909 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1910 fold_build1 (ABS_EXPR, type, difference),
1911 CHREC_RIGHT (chrec_b));
1912 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1913 *last_conflicts = integer_one_node;
1914
1915
1916 /* Perform weak-zero siv test to see if overlap is
1917 outside the loop bounds. */
1918 numiter = max_stmt_executions_int (loop);
1919
1920 if (numiter >= 0
1921 && compare_tree_int (tmp, numiter) > 0)
1922 {
1923 free_conflict_function (*overlaps_a);
1924 free_conflict_function (*overlaps_b);
1925 *overlaps_a = conflict_fn_no_dependence ();
1926 *overlaps_b = conflict_fn_no_dependence ();
1927 *last_conflicts = integer_zero_node;
1928 dependence_stats.num_siv_independent++;
1929 return;
1930 }
1931 dependence_stats.num_siv_dependent++;
1932 return;
1933 }
1934
1935 /* When the step does not divide the difference, there are
1936 no overlaps. */
1937 else
1938 {
1939 *overlaps_a = conflict_fn_no_dependence ();
1940 *overlaps_b = conflict_fn_no_dependence ();
1941 *last_conflicts = integer_zero_node;
1942 dependence_stats.num_siv_independent++;
1943 return;
1944 }
1945 }
1946
1947 else
1948 {
1949 /* Example:
1950 chrec_a = 12
1951 chrec_b = {10, +, -1}
1952
1953 In this case, chrec_a will not overlap with chrec_b. */
1954 *overlaps_a = conflict_fn_no_dependence ();
1955 *overlaps_b = conflict_fn_no_dependence ();
1956 *last_conflicts = integer_zero_node;
1957 dependence_stats.num_siv_independent++;
1958 return;
1959 }
1960 }
1961 }
1962 else
1963 {
1964 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1965 {
1966 if (dump_file && (dump_flags & TDF_DETAILS))
1967 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1968
1969 *overlaps_a = conflict_fn_not_known ();
1970 *overlaps_b = conflict_fn_not_known ();
1971 *last_conflicts = chrec_dont_know;
1972 dependence_stats.num_siv_unimplemented++;
1973 return;
1974 }
1975 else
1976 {
1977 if (value2 == false)
1978 {
1979 /* Example:
1980 chrec_a = 3
1981 chrec_b = {10, +, -1}
1982 */
1983 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1984 {
1985 HOST_WIDE_INT numiter;
1986 struct loop *loop = get_chrec_loop (chrec_b);
1987
1988 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1989 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1990 CHREC_RIGHT (chrec_b));
1991 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1992 *last_conflicts = integer_one_node;
1993
1994 /* Perform weak-zero siv test to see if overlap is
1995 outside the loop bounds. */
1996 numiter = max_stmt_executions_int (loop);
1997
1998 if (numiter >= 0
1999 && compare_tree_int (tmp, numiter) > 0)
2000 {
2001 free_conflict_function (*overlaps_a);
2002 free_conflict_function (*overlaps_b);
2003 *overlaps_a = conflict_fn_no_dependence ();
2004 *overlaps_b = conflict_fn_no_dependence ();
2005 *last_conflicts = integer_zero_node;
2006 dependence_stats.num_siv_independent++;
2007 return;
2008 }
2009 dependence_stats.num_siv_dependent++;
2010 return;
2011 }
2012
2013 /* When the step does not divide the difference, there
2014 are no overlaps. */
2015 else
2016 {
2017 *overlaps_a = conflict_fn_no_dependence ();
2018 *overlaps_b = conflict_fn_no_dependence ();
2019 *last_conflicts = integer_zero_node;
2020 dependence_stats.num_siv_independent++;
2021 return;
2022 }
2023 }
2024 else
2025 {
2026 /* Example:
2027 chrec_a = 3
2028 chrec_b = {4, +, 1}
2029
2030 In this case, chrec_a will not overlap with chrec_b. */
2031 *overlaps_a = conflict_fn_no_dependence ();
2032 *overlaps_b = conflict_fn_no_dependence ();
2033 *last_conflicts = integer_zero_node;
2034 dependence_stats.num_siv_independent++;
2035 return;
2036 }
2037 }
2038 }
2039 }
2040 }
2041
2042 /* Helper recursive function for initializing the matrix A. Returns
2043 the initial value of CHREC. */
2044
2045 static tree
2046 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2047 {
2048 gcc_assert (chrec);
2049
2050 switch (TREE_CODE (chrec))
2051 {
2052 case POLYNOMIAL_CHREC:
2053 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2054
2055 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2056 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2057
2058 case PLUS_EXPR:
2059 case MULT_EXPR:
2060 case MINUS_EXPR:
2061 {
2062 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2063 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2064
2065 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2066 }
2067
2068 case NOP_EXPR:
2069 {
2070 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2071 return chrec_convert (chrec_type (chrec), op, NULL);
2072 }
2073
2074 case BIT_NOT_EXPR:
2075 {
2076 /* Handle ~X as -1 - X. */
2077 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2078 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2079 build_int_cst (TREE_TYPE (chrec), -1), op);
2080 }
2081
2082 case INTEGER_CST:
2083 return chrec;
2084
2085 default:
2086 gcc_unreachable ();
2087 return NULL_TREE;
2088 }
2089 }
2090
2091 #define FLOOR_DIV(x,y) ((x) / (y))
2092
2093 /* Solves the special case of the Diophantine equation:
2094 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2095
2096 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2097 number of iterations that loops X and Y run. The overlaps will be
2098 constructed as evolutions in dimension DIM. */
2099
2100 static void
2101 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2102 affine_fn *overlaps_a,
2103 affine_fn *overlaps_b,
2104 tree *last_conflicts, int dim)
2105 {
2106 if (((step_a > 0 && step_b > 0)
2107 || (step_a < 0 && step_b < 0)))
2108 {
2109 int step_overlaps_a, step_overlaps_b;
2110 int gcd_steps_a_b, last_conflict, tau2;
2111
2112 gcd_steps_a_b = gcd (step_a, step_b);
2113 step_overlaps_a = step_b / gcd_steps_a_b;
2114 step_overlaps_b = step_a / gcd_steps_a_b;
2115
2116 if (niter > 0)
2117 {
2118 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2119 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2120 last_conflict = tau2;
2121 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2122 }
2123 else
2124 *last_conflicts = chrec_dont_know;
2125
2126 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2127 build_int_cst (NULL_TREE,
2128 step_overlaps_a));
2129 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2130 build_int_cst (NULL_TREE,
2131 step_overlaps_b));
2132 }
2133
2134 else
2135 {
2136 *overlaps_a = affine_fn_cst (integer_zero_node);
2137 *overlaps_b = affine_fn_cst (integer_zero_node);
2138 *last_conflicts = integer_zero_node;
2139 }
2140 }
2141
2142 /* Solves the special case of a Diophantine equation where CHREC_A is
2143 an affine bivariate function, and CHREC_B is an affine univariate
2144 function. For example,
2145
2146 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2147
2148 has the following overlapping functions:
2149
2150 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2151 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2152 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2153
2154 FORNOW: This is a specialized implementation for a case occurring in
2155 a common benchmark. Implement the general algorithm. */
2156
2157 static void
2158 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2159 conflict_function **overlaps_a,
2160 conflict_function **overlaps_b,
2161 tree *last_conflicts)
2162 {
2163 bool xz_p, yz_p, xyz_p;
2164 int step_x, step_y, step_z;
2165 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2166 affine_fn overlaps_a_xz, overlaps_b_xz;
2167 affine_fn overlaps_a_yz, overlaps_b_yz;
2168 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2169 affine_fn ova1, ova2, ovb;
2170 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2171
2172 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2173 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2174 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2175
2176 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2177 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2178 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2179
2180 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2181 {
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2184
2185 *overlaps_a = conflict_fn_not_known ();
2186 *overlaps_b = conflict_fn_not_known ();
2187 *last_conflicts = chrec_dont_know;
2188 return;
2189 }
2190
2191 niter = MIN (niter_x, niter_z);
2192 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2193 &overlaps_a_xz,
2194 &overlaps_b_xz,
2195 &last_conflicts_xz, 1);
2196 niter = MIN (niter_y, niter_z);
2197 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2198 &overlaps_a_yz,
2199 &overlaps_b_yz,
2200 &last_conflicts_yz, 2);
2201 niter = MIN (niter_x, niter_z);
2202 niter = MIN (niter_y, niter);
2203 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2204 &overlaps_a_xyz,
2205 &overlaps_b_xyz,
2206 &last_conflicts_xyz, 3);
2207
2208 xz_p = !integer_zerop (last_conflicts_xz);
2209 yz_p = !integer_zerop (last_conflicts_yz);
2210 xyz_p = !integer_zerop (last_conflicts_xyz);
2211
2212 if (xz_p || yz_p || xyz_p)
2213 {
2214 ova1 = affine_fn_cst (integer_zero_node);
2215 ova2 = affine_fn_cst (integer_zero_node);
2216 ovb = affine_fn_cst (integer_zero_node);
2217 if (xz_p)
2218 {
2219 affine_fn t0 = ova1;
2220 affine_fn t2 = ovb;
2221
2222 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2223 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2224 affine_fn_free (t0);
2225 affine_fn_free (t2);
2226 *last_conflicts = last_conflicts_xz;
2227 }
2228 if (yz_p)
2229 {
2230 affine_fn t0 = ova2;
2231 affine_fn t2 = ovb;
2232
2233 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2234 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2235 affine_fn_free (t0);
2236 affine_fn_free (t2);
2237 *last_conflicts = last_conflicts_yz;
2238 }
2239 if (xyz_p)
2240 {
2241 affine_fn t0 = ova1;
2242 affine_fn t2 = ova2;
2243 affine_fn t4 = ovb;
2244
2245 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2246 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2247 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2248 affine_fn_free (t0);
2249 affine_fn_free (t2);
2250 affine_fn_free (t4);
2251 *last_conflicts = last_conflicts_xyz;
2252 }
2253 *overlaps_a = conflict_fn (2, ova1, ova2);
2254 *overlaps_b = conflict_fn (1, ovb);
2255 }
2256 else
2257 {
2258 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2259 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2260 *last_conflicts = integer_zero_node;
2261 }
2262
2263 affine_fn_free (overlaps_a_xz);
2264 affine_fn_free (overlaps_b_xz);
2265 affine_fn_free (overlaps_a_yz);
2266 affine_fn_free (overlaps_b_yz);
2267 affine_fn_free (overlaps_a_xyz);
2268 affine_fn_free (overlaps_b_xyz);
2269 }
2270
2271 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2272
2273 static void
2274 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2275 int size)
2276 {
2277 memcpy (vec2, vec1, size * sizeof (*vec1));
2278 }
2279
2280 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2281
2282 static void
2283 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2284 int m, int n)
2285 {
2286 int i;
2287
2288 for (i = 0; i < m; i++)
2289 lambda_vector_copy (mat1[i], mat2[i], n);
2290 }
2291
2292 /* Store the N x N identity matrix in MAT. */
2293
2294 static void
2295 lambda_matrix_id (lambda_matrix mat, int size)
2296 {
2297 int i, j;
2298
2299 for (i = 0; i < size; i++)
2300 for (j = 0; j < size; j++)
2301 mat[i][j] = (i == j) ? 1 : 0;
2302 }
2303
2304 /* Return the first nonzero element of vector VEC1 between START and N.
2305 We must have START <= N. Returns N if VEC1 is the zero vector. */
2306
2307 static int
2308 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2309 {
2310 int j = start;
2311 while (j < n && vec1[j] == 0)
2312 j++;
2313 return j;
2314 }
2315
2316 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2317 R2 = R2 + CONST1 * R1. */
2318
2319 static void
2320 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2321 {
2322 int i;
2323
2324 if (const1 == 0)
2325 return;
2326
2327 for (i = 0; i < n; i++)
2328 mat[r2][i] += const1 * mat[r1][i];
2329 }
2330
2331 /* Swap rows R1 and R2 in matrix MAT. */
2332
2333 static void
2334 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2335 {
2336 lambda_vector row;
2337
2338 row = mat[r1];
2339 mat[r1] = mat[r2];
2340 mat[r2] = row;
2341 }
2342
2343 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2344 and store the result in VEC2. */
2345
2346 static void
2347 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2348 int size, int const1)
2349 {
2350 int i;
2351
2352 if (const1 == 0)
2353 lambda_vector_clear (vec2, size);
2354 else
2355 for (i = 0; i < size; i++)
2356 vec2[i] = const1 * vec1[i];
2357 }
2358
2359 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2360
2361 static void
2362 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2363 int size)
2364 {
2365 lambda_vector_mult_const (vec1, vec2, size, -1);
2366 }
2367
2368 /* Negate row R1 of matrix MAT which has N columns. */
2369
2370 static void
2371 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2372 {
2373 lambda_vector_negate (mat[r1], mat[r1], n);
2374 }
2375
2376 /* Return true if two vectors are equal. */
2377
2378 static bool
2379 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2380 {
2381 int i;
2382 for (i = 0; i < size; i++)
2383 if (vec1[i] != vec2[i])
2384 return false;
2385 return true;
2386 }
2387
2388 /* Given an M x N integer matrix A, this function determines an M x
2389 M unimodular matrix U, and an M x N echelon matrix S such that
2390 "U.A = S". This decomposition is also known as "right Hermite".
2391
2392 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2393 Restructuring Compilers" Utpal Banerjee. */
2394
2395 static void
2396 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2397 lambda_matrix S, lambda_matrix U)
2398 {
2399 int i, j, i0 = 0;
2400
2401 lambda_matrix_copy (A, S, m, n);
2402 lambda_matrix_id (U, m);
2403
2404 for (j = 0; j < n; j++)
2405 {
2406 if (lambda_vector_first_nz (S[j], m, i0) < m)
2407 {
2408 ++i0;
2409 for (i = m - 1; i >= i0; i--)
2410 {
2411 while (S[i][j] != 0)
2412 {
2413 int sigma, factor, a, b;
2414
2415 a = S[i-1][j];
2416 b = S[i][j];
2417 sigma = (a * b < 0) ? -1: 1;
2418 a = abs (a);
2419 b = abs (b);
2420 factor = sigma * (a / b);
2421
2422 lambda_matrix_row_add (S, n, i, i-1, -factor);
2423 lambda_matrix_row_exchange (S, i, i-1);
2424
2425 lambda_matrix_row_add (U, m, i, i-1, -factor);
2426 lambda_matrix_row_exchange (U, i, i-1);
2427 }
2428 }
2429 }
2430 }
2431 }
2432
2433 /* Determines the overlapping elements due to accesses CHREC_A and
2434 CHREC_B, that are affine functions. This function cannot handle
2435 symbolic evolution functions, ie. when initial conditions are
2436 parameters, because it uses lambda matrices of integers. */
2437
2438 static void
2439 analyze_subscript_affine_affine (tree chrec_a,
2440 tree chrec_b,
2441 conflict_function **overlaps_a,
2442 conflict_function **overlaps_b,
2443 tree *last_conflicts)
2444 {
2445 unsigned nb_vars_a, nb_vars_b, dim;
2446 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2447 lambda_matrix A, U, S;
2448 struct obstack scratch_obstack;
2449
2450 if (eq_evolutions_p (chrec_a, chrec_b))
2451 {
2452 /* The accessed index overlaps for each iteration in the
2453 loop. */
2454 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2455 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2456 *last_conflicts = chrec_dont_know;
2457 return;
2458 }
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2460 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2461
2462 /* For determining the initial intersection, we have to solve a
2463 Diophantine equation. This is the most time consuming part.
2464
2465 For answering to the question: "Is there a dependence?" we have
2466 to prove that there exists a solution to the Diophantine
2467 equation, and that the solution is in the iteration domain,
2468 i.e. the solution is positive or zero, and that the solution
2469 happens before the upper bound loop.nb_iterations. Otherwise
2470 there is no dependence. This function outputs a description of
2471 the iterations that hold the intersections. */
2472
2473 nb_vars_a = nb_vars_in_chrec (chrec_a);
2474 nb_vars_b = nb_vars_in_chrec (chrec_b);
2475
2476 gcc_obstack_init (&scratch_obstack);
2477
2478 dim = nb_vars_a + nb_vars_b;
2479 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2480 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2481 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2482
2483 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2484 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2485 gamma = init_b - init_a;
2486
2487 /* Don't do all the hard work of solving the Diophantine equation
2488 when we already know the solution: for example,
2489 | {3, +, 1}_1
2490 | {3, +, 4}_2
2491 | gamma = 3 - 3 = 0.
2492 Then the first overlap occurs during the first iterations:
2493 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2494 */
2495 if (gamma == 0)
2496 {
2497 if (nb_vars_a == 1 && nb_vars_b == 1)
2498 {
2499 HOST_WIDE_INT step_a, step_b;
2500 HOST_WIDE_INT niter, niter_a, niter_b;
2501 affine_fn ova, ovb;
2502
2503 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2504 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2505 niter = MIN (niter_a, niter_b);
2506 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2507 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2508
2509 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2510 &ova, &ovb,
2511 last_conflicts, 1);
2512 *overlaps_a = conflict_fn (1, ova);
2513 *overlaps_b = conflict_fn (1, ovb);
2514 }
2515
2516 else if (nb_vars_a == 2 && nb_vars_b == 1)
2517 compute_overlap_steps_for_affine_1_2
2518 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2519
2520 else if (nb_vars_a == 1 && nb_vars_b == 2)
2521 compute_overlap_steps_for_affine_1_2
2522 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2523
2524 else
2525 {
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2528 *overlaps_a = conflict_fn_not_known ();
2529 *overlaps_b = conflict_fn_not_known ();
2530 *last_conflicts = chrec_dont_know;
2531 }
2532 goto end_analyze_subs_aa;
2533 }
2534
2535 /* U.A = S */
2536 lambda_matrix_right_hermite (A, dim, 1, S, U);
2537
2538 if (S[0][0] < 0)
2539 {
2540 S[0][0] *= -1;
2541 lambda_matrix_row_negate (U, dim, 0);
2542 }
2543 gcd_alpha_beta = S[0][0];
2544
2545 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2546 but that is a quite strange case. Instead of ICEing, answer
2547 don't know. */
2548 if (gcd_alpha_beta == 0)
2549 {
2550 *overlaps_a = conflict_fn_not_known ();
2551 *overlaps_b = conflict_fn_not_known ();
2552 *last_conflicts = chrec_dont_know;
2553 goto end_analyze_subs_aa;
2554 }
2555
2556 /* The classic "gcd-test". */
2557 if (!int_divides_p (gcd_alpha_beta, gamma))
2558 {
2559 /* The "gcd-test" has determined that there is no integer
2560 solution, i.e. there is no dependence. */
2561 *overlaps_a = conflict_fn_no_dependence ();
2562 *overlaps_b = conflict_fn_no_dependence ();
2563 *last_conflicts = integer_zero_node;
2564 }
2565
2566 /* Both access functions are univariate. This includes SIV and MIV cases. */
2567 else if (nb_vars_a == 1 && nb_vars_b == 1)
2568 {
2569 /* Both functions should have the same evolution sign. */
2570 if (((A[0][0] > 0 && -A[1][0] > 0)
2571 || (A[0][0] < 0 && -A[1][0] < 0)))
2572 {
2573 /* The solutions are given by:
2574 |
2575 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2576 | [u21 u22] [y0]
2577
2578 For a given integer t. Using the following variables,
2579
2580 | i0 = u11 * gamma / gcd_alpha_beta
2581 | j0 = u12 * gamma / gcd_alpha_beta
2582 | i1 = u21
2583 | j1 = u22
2584
2585 the solutions are:
2586
2587 | x0 = i0 + i1 * t,
2588 | y0 = j0 + j1 * t. */
2589 HOST_WIDE_INT i0, j0, i1, j1;
2590
2591 i0 = U[0][0] * gamma / gcd_alpha_beta;
2592 j0 = U[0][1] * gamma / gcd_alpha_beta;
2593 i1 = U[1][0];
2594 j1 = U[1][1];
2595
2596 if ((i1 == 0 && i0 < 0)
2597 || (j1 == 0 && j0 < 0))
2598 {
2599 /* There is no solution.
2600 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2601 falls in here, but for the moment we don't look at the
2602 upper bound of the iteration domain. */
2603 *overlaps_a = conflict_fn_no_dependence ();
2604 *overlaps_b = conflict_fn_no_dependence ();
2605 *last_conflicts = integer_zero_node;
2606 goto end_analyze_subs_aa;
2607 }
2608
2609 if (i1 > 0 && j1 > 0)
2610 {
2611 HOST_WIDE_INT niter_a
2612 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2613 HOST_WIDE_INT niter_b
2614 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2615 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2616
2617 /* (X0, Y0) is a solution of the Diophantine equation:
2618 "chrec_a (X0) = chrec_b (Y0)". */
2619 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2620 CEIL (-j0, j1));
2621 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2622 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2623
2624 /* (X1, Y1) is the smallest positive solution of the eq
2625 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2626 first conflict occurs. */
2627 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2628 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2629 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2630
2631 if (niter > 0)
2632 {
2633 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2634 FLOOR_DIV (niter - j0, j1));
2635 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2636
2637 /* If the overlap occurs outside of the bounds of the
2638 loop, there is no dependence. */
2639 if (x1 >= niter || y1 >= niter)
2640 {
2641 *overlaps_a = conflict_fn_no_dependence ();
2642 *overlaps_b = conflict_fn_no_dependence ();
2643 *last_conflicts = integer_zero_node;
2644 goto end_analyze_subs_aa;
2645 }
2646 else
2647 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2648 }
2649 else
2650 *last_conflicts = chrec_dont_know;
2651
2652 *overlaps_a
2653 = conflict_fn (1,
2654 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2655 1,
2656 build_int_cst (NULL_TREE, i1)));
2657 *overlaps_b
2658 = conflict_fn (1,
2659 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2660 1,
2661 build_int_cst (NULL_TREE, j1)));
2662 }
2663 else
2664 {
2665 /* FIXME: For the moment, the upper bound of the
2666 iteration domain for i and j is not checked. */
2667 if (dump_file && (dump_flags & TDF_DETAILS))
2668 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2669 *overlaps_a = conflict_fn_not_known ();
2670 *overlaps_b = conflict_fn_not_known ();
2671 *last_conflicts = chrec_dont_know;
2672 }
2673 }
2674 else
2675 {
2676 if (dump_file && (dump_flags & TDF_DETAILS))
2677 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2678 *overlaps_a = conflict_fn_not_known ();
2679 *overlaps_b = conflict_fn_not_known ();
2680 *last_conflicts = chrec_dont_know;
2681 }
2682 }
2683 else
2684 {
2685 if (dump_file && (dump_flags & TDF_DETAILS))
2686 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2687 *overlaps_a = conflict_fn_not_known ();
2688 *overlaps_b = conflict_fn_not_known ();
2689 *last_conflicts = chrec_dont_know;
2690 }
2691
2692 end_analyze_subs_aa:
2693 obstack_free (&scratch_obstack, NULL);
2694 if (dump_file && (dump_flags & TDF_DETAILS))
2695 {
2696 fprintf (dump_file, " (overlaps_a = ");
2697 dump_conflict_function (dump_file, *overlaps_a);
2698 fprintf (dump_file, ")\n (overlaps_b = ");
2699 dump_conflict_function (dump_file, *overlaps_b);
2700 fprintf (dump_file, "))\n");
2701 }
2702 }
2703
2704 /* Returns true when analyze_subscript_affine_affine can be used for
2705 determining the dependence relation between chrec_a and chrec_b,
2706 that contain symbols. This function modifies chrec_a and chrec_b
2707 such that the analysis result is the same, and such that they don't
2708 contain symbols, and then can safely be passed to the analyzer.
2709
2710 Example: The analysis of the following tuples of evolutions produce
2711 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2712 vs. {0, +, 1}_1
2713
2714 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2715 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2716 */
2717
2718 static bool
2719 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2720 {
2721 tree diff, type, left_a, left_b, right_b;
2722
2723 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2724 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2725 /* FIXME: For the moment not handled. Might be refined later. */
2726 return false;
2727
2728 type = chrec_type (*chrec_a);
2729 left_a = CHREC_LEFT (*chrec_a);
2730 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2731 diff = chrec_fold_minus (type, left_a, left_b);
2732
2733 if (!evolution_function_is_constant_p (diff))
2734 return false;
2735
2736 if (dump_file && (dump_flags & TDF_DETAILS))
2737 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2738
2739 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2740 diff, CHREC_RIGHT (*chrec_a));
2741 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2742 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2743 build_int_cst (type, 0),
2744 right_b);
2745 return true;
2746 }
2747
2748 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2749 *OVERLAPS_B are initialized to the functions that describe the
2750 relation between the elements accessed twice by CHREC_A and
2751 CHREC_B. For k >= 0, the following property is verified:
2752
2753 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2754
2755 static void
2756 analyze_siv_subscript (tree chrec_a,
2757 tree chrec_b,
2758 conflict_function **overlaps_a,
2759 conflict_function **overlaps_b,
2760 tree *last_conflicts,
2761 int loop_nest_num)
2762 {
2763 dependence_stats.num_siv++;
2764
2765 if (dump_file && (dump_flags & TDF_DETAILS))
2766 fprintf (dump_file, "(analyze_siv_subscript \n");
2767
2768 if (evolution_function_is_constant_p (chrec_a)
2769 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2770 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2771 overlaps_a, overlaps_b, last_conflicts);
2772
2773 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2774 && evolution_function_is_constant_p (chrec_b))
2775 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2776 overlaps_b, overlaps_a, last_conflicts);
2777
2778 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2779 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2780 {
2781 if (!chrec_contains_symbols (chrec_a)
2782 && !chrec_contains_symbols (chrec_b))
2783 {
2784 analyze_subscript_affine_affine (chrec_a, chrec_b,
2785 overlaps_a, overlaps_b,
2786 last_conflicts);
2787
2788 if (CF_NOT_KNOWN_P (*overlaps_a)
2789 || CF_NOT_KNOWN_P (*overlaps_b))
2790 dependence_stats.num_siv_unimplemented++;
2791 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2792 || CF_NO_DEPENDENCE_P (*overlaps_b))
2793 dependence_stats.num_siv_independent++;
2794 else
2795 dependence_stats.num_siv_dependent++;
2796 }
2797 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2798 &chrec_b))
2799 {
2800 analyze_subscript_affine_affine (chrec_a, chrec_b,
2801 overlaps_a, overlaps_b,
2802 last_conflicts);
2803
2804 if (CF_NOT_KNOWN_P (*overlaps_a)
2805 || CF_NOT_KNOWN_P (*overlaps_b))
2806 dependence_stats.num_siv_unimplemented++;
2807 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2808 || CF_NO_DEPENDENCE_P (*overlaps_b))
2809 dependence_stats.num_siv_independent++;
2810 else
2811 dependence_stats.num_siv_dependent++;
2812 }
2813 else
2814 goto siv_subscript_dontknow;
2815 }
2816
2817 else
2818 {
2819 siv_subscript_dontknow:;
2820 if (dump_file && (dump_flags & TDF_DETAILS))
2821 fprintf (dump_file, " siv test failed: unimplemented");
2822 *overlaps_a = conflict_fn_not_known ();
2823 *overlaps_b = conflict_fn_not_known ();
2824 *last_conflicts = chrec_dont_know;
2825 dependence_stats.num_siv_unimplemented++;
2826 }
2827
2828 if (dump_file && (dump_flags & TDF_DETAILS))
2829 fprintf (dump_file, ")\n");
2830 }
2831
2832 /* Returns false if we can prove that the greatest common divisor of the steps
2833 of CHREC does not divide CST, false otherwise. */
2834
2835 static bool
2836 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2837 {
2838 HOST_WIDE_INT cd = 0, val;
2839 tree step;
2840
2841 if (!tree_fits_shwi_p (cst))
2842 return true;
2843 val = tree_to_shwi (cst);
2844
2845 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2846 {
2847 step = CHREC_RIGHT (chrec);
2848 if (!tree_fits_shwi_p (step))
2849 return true;
2850 cd = gcd (cd, tree_to_shwi (step));
2851 chrec = CHREC_LEFT (chrec);
2852 }
2853
2854 return val % cd == 0;
2855 }
2856
2857 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2858 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2859 functions that describe the relation between the elements accessed
2860 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2861 is verified:
2862
2863 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2864
2865 static void
2866 analyze_miv_subscript (tree chrec_a,
2867 tree chrec_b,
2868 conflict_function **overlaps_a,
2869 conflict_function **overlaps_b,
2870 tree *last_conflicts,
2871 struct loop *loop_nest)
2872 {
2873 tree type, difference;
2874
2875 dependence_stats.num_miv++;
2876 if (dump_file && (dump_flags & TDF_DETAILS))
2877 fprintf (dump_file, "(analyze_miv_subscript \n");
2878
2879 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2880 chrec_a = chrec_convert (type, chrec_a, NULL);
2881 chrec_b = chrec_convert (type, chrec_b, NULL);
2882 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2883
2884 if (eq_evolutions_p (chrec_a, chrec_b))
2885 {
2886 /* Access functions are the same: all the elements are accessed
2887 in the same order. */
2888 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2889 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2890 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2891 dependence_stats.num_miv_dependent++;
2892 }
2893
2894 else if (evolution_function_is_constant_p (difference)
2895 /* For the moment, the following is verified:
2896 evolution_function_is_affine_multivariate_p (chrec_a,
2897 loop_nest->num) */
2898 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2899 {
2900 /* testsuite/.../ssa-chrec-33.c
2901 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2902
2903 The difference is 1, and all the evolution steps are multiples
2904 of 2, consequently there are no overlapping elements. */
2905 *overlaps_a = conflict_fn_no_dependence ();
2906 *overlaps_b = conflict_fn_no_dependence ();
2907 *last_conflicts = integer_zero_node;
2908 dependence_stats.num_miv_independent++;
2909 }
2910
2911 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2912 && !chrec_contains_symbols (chrec_a)
2913 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2914 && !chrec_contains_symbols (chrec_b))
2915 {
2916 /* testsuite/.../ssa-chrec-35.c
2917 {0, +, 1}_2 vs. {0, +, 1}_3
2918 the overlapping elements are respectively located at iterations:
2919 {0, +, 1}_x and {0, +, 1}_x,
2920 in other words, we have the equality:
2921 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2922
2923 Other examples:
2924 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2925 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2926
2927 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2928 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2929 */
2930 analyze_subscript_affine_affine (chrec_a, chrec_b,
2931 overlaps_a, overlaps_b, last_conflicts);
2932
2933 if (CF_NOT_KNOWN_P (*overlaps_a)
2934 || CF_NOT_KNOWN_P (*overlaps_b))
2935 dependence_stats.num_miv_unimplemented++;
2936 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2937 || CF_NO_DEPENDENCE_P (*overlaps_b))
2938 dependence_stats.num_miv_independent++;
2939 else
2940 dependence_stats.num_miv_dependent++;
2941 }
2942
2943 else
2944 {
2945 /* When the analysis is too difficult, answer "don't know". */
2946 if (dump_file && (dump_flags & TDF_DETAILS))
2947 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2948
2949 *overlaps_a = conflict_fn_not_known ();
2950 *overlaps_b = conflict_fn_not_known ();
2951 *last_conflicts = chrec_dont_know;
2952 dependence_stats.num_miv_unimplemented++;
2953 }
2954
2955 if (dump_file && (dump_flags & TDF_DETAILS))
2956 fprintf (dump_file, ")\n");
2957 }
2958
2959 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2960 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2961 OVERLAP_ITERATIONS_B are initialized with two functions that
2962 describe the iterations that contain conflicting elements.
2963
2964 Remark: For an integer k >= 0, the following equality is true:
2965
2966 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2967 */
2968
2969 static void
2970 analyze_overlapping_iterations (tree chrec_a,
2971 tree chrec_b,
2972 conflict_function **overlap_iterations_a,
2973 conflict_function **overlap_iterations_b,
2974 tree *last_conflicts, struct loop *loop_nest)
2975 {
2976 unsigned int lnn = loop_nest->num;
2977
2978 dependence_stats.num_subscript_tests++;
2979
2980 if (dump_file && (dump_flags & TDF_DETAILS))
2981 {
2982 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2983 fprintf (dump_file, " (chrec_a = ");
2984 print_generic_expr (dump_file, chrec_a, 0);
2985 fprintf (dump_file, ")\n (chrec_b = ");
2986 print_generic_expr (dump_file, chrec_b, 0);
2987 fprintf (dump_file, ")\n");
2988 }
2989
2990 if (chrec_a == NULL_TREE
2991 || chrec_b == NULL_TREE
2992 || chrec_contains_undetermined (chrec_a)
2993 || chrec_contains_undetermined (chrec_b))
2994 {
2995 dependence_stats.num_subscript_undetermined++;
2996
2997 *overlap_iterations_a = conflict_fn_not_known ();
2998 *overlap_iterations_b = conflict_fn_not_known ();
2999 }
3000
3001 /* If they are the same chrec, and are affine, they overlap
3002 on every iteration. */
3003 else if (eq_evolutions_p (chrec_a, chrec_b)
3004 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3005 || operand_equal_p (chrec_a, chrec_b, 0)))
3006 {
3007 dependence_stats.num_same_subscript_function++;
3008 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3009 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3010 *last_conflicts = chrec_dont_know;
3011 }
3012
3013 /* If they aren't the same, and aren't affine, we can't do anything
3014 yet. */
3015 else if ((chrec_contains_symbols (chrec_a)
3016 || chrec_contains_symbols (chrec_b))
3017 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3018 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3019 {
3020 dependence_stats.num_subscript_undetermined++;
3021 *overlap_iterations_a = conflict_fn_not_known ();
3022 *overlap_iterations_b = conflict_fn_not_known ();
3023 }
3024
3025 else if (ziv_subscript_p (chrec_a, chrec_b))
3026 analyze_ziv_subscript (chrec_a, chrec_b,
3027 overlap_iterations_a, overlap_iterations_b,
3028 last_conflicts);
3029
3030 else if (siv_subscript_p (chrec_a, chrec_b))
3031 analyze_siv_subscript (chrec_a, chrec_b,
3032 overlap_iterations_a, overlap_iterations_b,
3033 last_conflicts, lnn);
3034
3035 else
3036 analyze_miv_subscript (chrec_a, chrec_b,
3037 overlap_iterations_a, overlap_iterations_b,
3038 last_conflicts, loop_nest);
3039
3040 if (dump_file && (dump_flags & TDF_DETAILS))
3041 {
3042 fprintf (dump_file, " (overlap_iterations_a = ");
3043 dump_conflict_function (dump_file, *overlap_iterations_a);
3044 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3045 dump_conflict_function (dump_file, *overlap_iterations_b);
3046 fprintf (dump_file, "))\n");
3047 }
3048 }
3049
3050 /* Helper function for uniquely inserting distance vectors. */
3051
3052 static void
3053 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3054 {
3055 unsigned i;
3056 lambda_vector v;
3057
3058 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3059 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3060 return;
3061
3062 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3063 }
3064
3065 /* Helper function for uniquely inserting direction vectors. */
3066
3067 static void
3068 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3069 {
3070 unsigned i;
3071 lambda_vector v;
3072
3073 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3074 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3075 return;
3076
3077 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3078 }
3079
3080 /* Add a distance of 1 on all the loops outer than INDEX. If we
3081 haven't yet determined a distance for this outer loop, push a new
3082 distance vector composed of the previous distance, and a distance
3083 of 1 for this outer loop. Example:
3084
3085 | loop_1
3086 | loop_2
3087 | A[10]
3088 | endloop_2
3089 | endloop_1
3090
3091 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3092 save (0, 1), then we have to save (1, 0). */
3093
3094 static void
3095 add_outer_distances (struct data_dependence_relation *ddr,
3096 lambda_vector dist_v, int index)
3097 {
3098 /* For each outer loop where init_v is not set, the accesses are
3099 in dependence of distance 1 in the loop. */
3100 while (--index >= 0)
3101 {
3102 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3103 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3104 save_v[index] = 1;
3105 save_dist_v (ddr, save_v);
3106 }
3107 }
3108
3109 /* Return false when fail to represent the data dependence as a
3110 distance vector. INIT_B is set to true when a component has been
3111 added to the distance vector DIST_V. INDEX_CARRY is then set to
3112 the index in DIST_V that carries the dependence. */
3113
3114 static bool
3115 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3116 struct data_reference *ddr_a,
3117 struct data_reference *ddr_b,
3118 lambda_vector dist_v, bool *init_b,
3119 int *index_carry)
3120 {
3121 unsigned i;
3122 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3123
3124 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3125 {
3126 tree access_fn_a, access_fn_b;
3127 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3128
3129 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3130 {
3131 non_affine_dependence_relation (ddr);
3132 return false;
3133 }
3134
3135 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3136 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3137
3138 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3139 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3140 {
3141 int dist, index;
3142 int var_a = CHREC_VARIABLE (access_fn_a);
3143 int var_b = CHREC_VARIABLE (access_fn_b);
3144
3145 if (var_a != var_b
3146 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3147 {
3148 non_affine_dependence_relation (ddr);
3149 return false;
3150 }
3151
3152 dist = int_cst_value (SUB_DISTANCE (subscript));
3153 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3154 *index_carry = MIN (index, *index_carry);
3155
3156 /* This is the subscript coupling test. If we have already
3157 recorded a distance for this loop (a distance coming from
3158 another subscript), it should be the same. For example,
3159 in the following code, there is no dependence:
3160
3161 | loop i = 0, N, 1
3162 | T[i+1][i] = ...
3163 | ... = T[i][i]
3164 | endloop
3165 */
3166 if (init_v[index] != 0 && dist_v[index] != dist)
3167 {
3168 finalize_ddr_dependent (ddr, chrec_known);
3169 return false;
3170 }
3171
3172 dist_v[index] = dist;
3173 init_v[index] = 1;
3174 *init_b = true;
3175 }
3176 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3177 {
3178 /* This can be for example an affine vs. constant dependence
3179 (T[i] vs. T[3]) that is not an affine dependence and is
3180 not representable as a distance vector. */
3181 non_affine_dependence_relation (ddr);
3182 return false;
3183 }
3184 }
3185
3186 return true;
3187 }
3188
3189 /* Return true when the DDR contains only constant access functions. */
3190
3191 static bool
3192 constant_access_functions (const struct data_dependence_relation *ddr)
3193 {
3194 unsigned i;
3195
3196 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3197 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3198 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3199 return false;
3200
3201 return true;
3202 }
3203
3204 /* Helper function for the case where DDR_A and DDR_B are the same
3205 multivariate access function with a constant step. For an example
3206 see pr34635-1.c. */
3207
3208 static void
3209 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3210 {
3211 int x_1, x_2;
3212 tree c_1 = CHREC_LEFT (c_2);
3213 tree c_0 = CHREC_LEFT (c_1);
3214 lambda_vector dist_v;
3215 int v1, v2, cd;
3216
3217 /* Polynomials with more than 2 variables are not handled yet. When
3218 the evolution steps are parameters, it is not possible to
3219 represent the dependence using classical distance vectors. */
3220 if (TREE_CODE (c_0) != INTEGER_CST
3221 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3222 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3223 {
3224 DDR_AFFINE_P (ddr) = false;
3225 return;
3226 }
3227
3228 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3229 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3230
3231 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3232 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3233 v1 = int_cst_value (CHREC_RIGHT (c_1));
3234 v2 = int_cst_value (CHREC_RIGHT (c_2));
3235 cd = gcd (v1, v2);
3236 v1 /= cd;
3237 v2 /= cd;
3238
3239 if (v2 < 0)
3240 {
3241 v2 = -v2;
3242 v1 = -v1;
3243 }
3244
3245 dist_v[x_1] = v2;
3246 dist_v[x_2] = -v1;
3247 save_dist_v (ddr, dist_v);
3248
3249 add_outer_distances (ddr, dist_v, x_1);
3250 }
3251
3252 /* Helper function for the case where DDR_A and DDR_B are the same
3253 access functions. */
3254
3255 static void
3256 add_other_self_distances (struct data_dependence_relation *ddr)
3257 {
3258 lambda_vector dist_v;
3259 unsigned i;
3260 int index_carry = DDR_NB_LOOPS (ddr);
3261
3262 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3263 {
3264 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3265
3266 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3267 {
3268 if (!evolution_function_is_univariate_p (access_fun))
3269 {
3270 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3271 {
3272 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3273 return;
3274 }
3275
3276 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3277
3278 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3279 add_multivariate_self_dist (ddr, access_fun);
3280 else
3281 /* The evolution step is not constant: it varies in
3282 the outer loop, so this cannot be represented by a
3283 distance vector. For example in pr34635.c the
3284 evolution is {0, +, {0, +, 4}_1}_2. */
3285 DDR_AFFINE_P (ddr) = false;
3286
3287 return;
3288 }
3289
3290 index_carry = MIN (index_carry,
3291 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3292 DDR_LOOP_NEST (ddr)));
3293 }
3294 }
3295
3296 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3297 add_outer_distances (ddr, dist_v, index_carry);
3298 }
3299
3300 static void
3301 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3302 {
3303 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3304
3305 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3306 save_dist_v (ddr, dist_v);
3307 }
3308
3309 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3310 is the case for example when access functions are the same and
3311 equal to a constant, as in:
3312
3313 | loop_1
3314 | A[3] = ...
3315 | ... = A[3]
3316 | endloop_1
3317
3318 in which case the distance vectors are (0) and (1). */
3319
3320 static void
3321 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3322 {
3323 unsigned i, j;
3324
3325 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3326 {
3327 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3328 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3329 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3330
3331 for (j = 0; j < ca->n; j++)
3332 if (affine_function_zero_p (ca->fns[j]))
3333 {
3334 insert_innermost_unit_dist_vector (ddr);
3335 return;
3336 }
3337
3338 for (j = 0; j < cb->n; j++)
3339 if (affine_function_zero_p (cb->fns[j]))
3340 {
3341 insert_innermost_unit_dist_vector (ddr);
3342 return;
3343 }
3344 }
3345 }
3346
3347 /* Compute the classic per loop distance vector. DDR is the data
3348 dependence relation to build a vector from. Return false when fail
3349 to represent the data dependence as a distance vector. */
3350
3351 static bool
3352 build_classic_dist_vector (struct data_dependence_relation *ddr,
3353 struct loop *loop_nest)
3354 {
3355 bool init_b = false;
3356 int index_carry = DDR_NB_LOOPS (ddr);
3357 lambda_vector dist_v;
3358
3359 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3360 return false;
3361
3362 if (same_access_functions (ddr))
3363 {
3364 /* Save the 0 vector. */
3365 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3366 save_dist_v (ddr, dist_v);
3367
3368 if (constant_access_functions (ddr))
3369 add_distance_for_zero_overlaps (ddr);
3370
3371 if (DDR_NB_LOOPS (ddr) > 1)
3372 add_other_self_distances (ddr);
3373
3374 return true;
3375 }
3376
3377 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3378 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3379 dist_v, &init_b, &index_carry))
3380 return false;
3381
3382 /* Save the distance vector if we initialized one. */
3383 if (init_b)
3384 {
3385 /* Verify a basic constraint: classic distance vectors should
3386 always be lexicographically positive.
3387
3388 Data references are collected in the order of execution of
3389 the program, thus for the following loop
3390
3391 | for (i = 1; i < 100; i++)
3392 | for (j = 1; j < 100; j++)
3393 | {
3394 | t = T[j+1][i-1]; // A
3395 | T[j][i] = t + 2; // B
3396 | }
3397
3398 references are collected following the direction of the wind:
3399 A then B. The data dependence tests are performed also
3400 following this order, such that we're looking at the distance
3401 separating the elements accessed by A from the elements later
3402 accessed by B. But in this example, the distance returned by
3403 test_dep (A, B) is lexicographically negative (-1, 1), that
3404 means that the access A occurs later than B with respect to
3405 the outer loop, ie. we're actually looking upwind. In this
3406 case we solve test_dep (B, A) looking downwind to the
3407 lexicographically positive solution, that returns the
3408 distance vector (1, -1). */
3409 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3410 {
3411 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3412 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3413 loop_nest))
3414 return false;
3415 compute_subscript_distance (ddr);
3416 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3417 save_v, &init_b, &index_carry))
3418 return false;
3419 save_dist_v (ddr, save_v);
3420 DDR_REVERSED_P (ddr) = true;
3421
3422 /* In this case there is a dependence forward for all the
3423 outer loops:
3424
3425 | for (k = 1; k < 100; k++)
3426 | for (i = 1; i < 100; i++)
3427 | for (j = 1; j < 100; j++)
3428 | {
3429 | t = T[j+1][i-1]; // A
3430 | T[j][i] = t + 2; // B
3431 | }
3432
3433 the vectors are:
3434 (0, 1, -1)
3435 (1, 1, -1)
3436 (1, -1, 1)
3437 */
3438 if (DDR_NB_LOOPS (ddr) > 1)
3439 {
3440 add_outer_distances (ddr, save_v, index_carry);
3441 add_outer_distances (ddr, dist_v, index_carry);
3442 }
3443 }
3444 else
3445 {
3446 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3447 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3448
3449 if (DDR_NB_LOOPS (ddr) > 1)
3450 {
3451 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3452
3453 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3454 DDR_A (ddr), loop_nest))
3455 return false;
3456 compute_subscript_distance (ddr);
3457 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3458 opposite_v, &init_b,
3459 &index_carry))
3460 return false;
3461
3462 save_dist_v (ddr, save_v);
3463 add_outer_distances (ddr, dist_v, index_carry);
3464 add_outer_distances (ddr, opposite_v, index_carry);
3465 }
3466 else
3467 save_dist_v (ddr, save_v);
3468 }
3469 }
3470 else
3471 {
3472 /* There is a distance of 1 on all the outer loops: Example:
3473 there is a dependence of distance 1 on loop_1 for the array A.
3474
3475 | loop_1
3476 | A[5] = ...
3477 | endloop
3478 */
3479 add_outer_distances (ddr, dist_v,
3480 lambda_vector_first_nz (dist_v,
3481 DDR_NB_LOOPS (ddr), 0));
3482 }
3483
3484 if (dump_file && (dump_flags & TDF_DETAILS))
3485 {
3486 unsigned i;
3487
3488 fprintf (dump_file, "(build_classic_dist_vector\n");
3489 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3490 {
3491 fprintf (dump_file, " dist_vector = (");
3492 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3493 DDR_NB_LOOPS (ddr));
3494 fprintf (dump_file, " )\n");
3495 }
3496 fprintf (dump_file, ")\n");
3497 }
3498
3499 return true;
3500 }
3501
3502 /* Return the direction for a given distance.
3503 FIXME: Computing dir this way is suboptimal, since dir can catch
3504 cases that dist is unable to represent. */
3505
3506 static inline enum data_dependence_direction
3507 dir_from_dist (int dist)
3508 {
3509 if (dist > 0)
3510 return dir_positive;
3511 else if (dist < 0)
3512 return dir_negative;
3513 else
3514 return dir_equal;
3515 }
3516
3517 /* Compute the classic per loop direction vector. DDR is the data
3518 dependence relation to build a vector from. */
3519
3520 static void
3521 build_classic_dir_vector (struct data_dependence_relation *ddr)
3522 {
3523 unsigned i, j;
3524 lambda_vector dist_v;
3525
3526 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3527 {
3528 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3529
3530 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3531 dir_v[j] = dir_from_dist (dist_v[j]);
3532
3533 save_dir_v (ddr, dir_v);
3534 }
3535 }
3536
3537 /* Helper function. Returns true when there is a dependence between
3538 data references DRA and DRB. */
3539
3540 static bool
3541 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3542 struct data_reference *dra,
3543 struct data_reference *drb,
3544 struct loop *loop_nest)
3545 {
3546 unsigned int i;
3547 tree last_conflicts;
3548 struct subscript *subscript;
3549 tree res = NULL_TREE;
3550
3551 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3552 {
3553 conflict_function *overlaps_a, *overlaps_b;
3554
3555 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3556 DR_ACCESS_FN (drb, i),
3557 &overlaps_a, &overlaps_b,
3558 &last_conflicts, loop_nest);
3559
3560 if (SUB_CONFLICTS_IN_A (subscript))
3561 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3562 if (SUB_CONFLICTS_IN_B (subscript))
3563 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3564
3565 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3566 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3567 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3568
3569 /* If there is any undetermined conflict function we have to
3570 give a conservative answer in case we cannot prove that
3571 no dependence exists when analyzing another subscript. */
3572 if (CF_NOT_KNOWN_P (overlaps_a)
3573 || CF_NOT_KNOWN_P (overlaps_b))
3574 {
3575 res = chrec_dont_know;
3576 continue;
3577 }
3578
3579 /* When there is a subscript with no dependence we can stop. */
3580 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3581 || CF_NO_DEPENDENCE_P (overlaps_b))
3582 {
3583 res = chrec_known;
3584 break;
3585 }
3586 }
3587
3588 if (res == NULL_TREE)
3589 return true;
3590
3591 if (res == chrec_known)
3592 dependence_stats.num_dependence_independent++;
3593 else
3594 dependence_stats.num_dependence_undetermined++;
3595 finalize_ddr_dependent (ddr, res);
3596 return false;
3597 }
3598
3599 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3600
3601 static void
3602 subscript_dependence_tester (struct data_dependence_relation *ddr,
3603 struct loop *loop_nest)
3604 {
3605 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3606 dependence_stats.num_dependence_dependent++;
3607
3608 compute_subscript_distance (ddr);
3609 if (build_classic_dist_vector (ddr, loop_nest))
3610 build_classic_dir_vector (ddr);
3611 }
3612
3613 /* Returns true when all the access functions of A are affine or
3614 constant with respect to LOOP_NEST. */
3615
3616 static bool
3617 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3618 const struct loop *loop_nest)
3619 {
3620 unsigned int i;
3621 vec<tree> fns = DR_ACCESS_FNS (a);
3622 tree t;
3623
3624 FOR_EACH_VEC_ELT (fns, i, t)
3625 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3626 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3627 return false;
3628
3629 return true;
3630 }
3631
3632 /* Initializes an equation for an OMEGA problem using the information
3633 contained in the ACCESS_FUN. Returns true when the operation
3634 succeeded.
3635
3636 PB is the omega constraint system.
3637 EQ is the number of the equation to be initialized.
3638 OFFSET is used for shifting the variables names in the constraints:
3639 a constrain is composed of 2 * the number of variables surrounding
3640 dependence accesses. OFFSET is set either to 0 for the first n variables,
3641 then it is set to n.
3642 ACCESS_FUN is expected to be an affine chrec. */
3643
3644 static bool
3645 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3646 unsigned int offset, tree access_fun,
3647 struct data_dependence_relation *ddr)
3648 {
3649 switch (TREE_CODE (access_fun))
3650 {
3651 case POLYNOMIAL_CHREC:
3652 {
3653 tree left = CHREC_LEFT (access_fun);
3654 tree right = CHREC_RIGHT (access_fun);
3655 int var = CHREC_VARIABLE (access_fun);
3656 unsigned var_idx;
3657
3658 if (TREE_CODE (right) != INTEGER_CST)
3659 return false;
3660
3661 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3662 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3663
3664 /* Compute the innermost loop index. */
3665 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3666
3667 if (offset == 0)
3668 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3669 += int_cst_value (right);
3670
3671 switch (TREE_CODE (left))
3672 {
3673 case POLYNOMIAL_CHREC:
3674 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3675
3676 case INTEGER_CST:
3677 pb->eqs[eq].coef[0] += int_cst_value (left);
3678 return true;
3679
3680 default:
3681 return false;
3682 }
3683 }
3684
3685 case INTEGER_CST:
3686 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3687 return true;
3688
3689 default:
3690 return false;
3691 }
3692 }
3693
3694 /* As explained in the comments preceding init_omega_for_ddr, we have
3695 to set up a system for each loop level, setting outer loops
3696 variation to zero, and current loop variation to positive or zero.
3697 Save each lexico positive distance vector. */
3698
3699 static void
3700 omega_extract_distance_vectors (omega_pb pb,
3701 struct data_dependence_relation *ddr)
3702 {
3703 int eq, geq;
3704 unsigned i, j;
3705 struct loop *loopi, *loopj;
3706 enum omega_result res;
3707
3708 /* Set a new problem for each loop in the nest. The basis is the
3709 problem that we have initialized until now. On top of this we
3710 add new constraints. */
3711 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3712 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3713 {
3714 int dist = 0;
3715 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3716 DDR_NB_LOOPS (ddr));
3717
3718 omega_copy_problem (copy, pb);
3719
3720 /* For all the outer loops "loop_j", add "dj = 0". */
3721 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3722 {
3723 eq = omega_add_zero_eq (copy, omega_black);
3724 copy->eqs[eq].coef[j + 1] = 1;
3725 }
3726
3727 /* For "loop_i", add "0 <= di". */
3728 geq = omega_add_zero_geq (copy, omega_black);
3729 copy->geqs[geq].coef[i + 1] = 1;
3730
3731 /* Reduce the constraint system, and test that the current
3732 problem is feasible. */
3733 res = omega_simplify_problem (copy);
3734 if (res == omega_false
3735 || res == omega_unknown
3736 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3737 goto next_problem;
3738
3739 for (eq = 0; eq < copy->num_subs; eq++)
3740 if (copy->subs[eq].key == (int) i + 1)
3741 {
3742 dist = copy->subs[eq].coef[0];
3743 goto found_dist;
3744 }
3745
3746 if (dist == 0)
3747 {
3748 /* Reinitialize problem... */
3749 omega_copy_problem (copy, pb);
3750 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3751 {
3752 eq = omega_add_zero_eq (copy, omega_black);
3753 copy->eqs[eq].coef[j + 1] = 1;
3754 }
3755
3756 /* ..., but this time "di = 1". */
3757 eq = omega_add_zero_eq (copy, omega_black);
3758 copy->eqs[eq].coef[i + 1] = 1;
3759 copy->eqs[eq].coef[0] = -1;
3760
3761 res = omega_simplify_problem (copy);
3762 if (res == omega_false
3763 || res == omega_unknown
3764 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3765 goto next_problem;
3766
3767 for (eq = 0; eq < copy->num_subs; eq++)
3768 if (copy->subs[eq].key == (int) i + 1)
3769 {
3770 dist = copy->subs[eq].coef[0];
3771 goto found_dist;
3772 }
3773 }
3774
3775 found_dist:;
3776 /* Save the lexicographically positive distance vector. */
3777 if (dist >= 0)
3778 {
3779 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3780 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3781
3782 dist_v[i] = dist;
3783
3784 for (eq = 0; eq < copy->num_subs; eq++)
3785 if (copy->subs[eq].key > 0)
3786 {
3787 dist = copy->subs[eq].coef[0];
3788 dist_v[copy->subs[eq].key - 1] = dist;
3789 }
3790
3791 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3792 dir_v[j] = dir_from_dist (dist_v[j]);
3793
3794 save_dist_v (ddr, dist_v);
3795 save_dir_v (ddr, dir_v);
3796 }
3797
3798 next_problem:;
3799 omega_free_problem (copy);
3800 }
3801 }
3802
3803 /* This is called for each subscript of a tuple of data references:
3804 insert an equality for representing the conflicts. */
3805
3806 static bool
3807 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3808 struct data_dependence_relation *ddr,
3809 omega_pb pb, bool *maybe_dependent)
3810 {
3811 int eq;
3812 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3813 TREE_TYPE (access_fun_b));
3814 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3815 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3816 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3817 tree minus_one;
3818
3819 /* When the fun_a - fun_b is not constant, the dependence is not
3820 captured by the classic distance vector representation. */
3821 if (TREE_CODE (difference) != INTEGER_CST)
3822 return false;
3823
3824 /* ZIV test. */
3825 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3826 {
3827 /* There is no dependence. */
3828 *maybe_dependent = false;
3829 return true;
3830 }
3831
3832 minus_one = build_int_cst (type, -1);
3833 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3834
3835 eq = omega_add_zero_eq (pb, omega_black);
3836 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3837 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3838 /* There is probably a dependence, but the system of
3839 constraints cannot be built: answer "don't know". */
3840 return false;
3841
3842 /* GCD test. */
3843 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3844 && !int_divides_p (lambda_vector_gcd
3845 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3846 2 * DDR_NB_LOOPS (ddr)),
3847 pb->eqs[eq].coef[0]))
3848 {
3849 /* There is no dependence. */
3850 *maybe_dependent = false;
3851 return true;
3852 }
3853
3854 return true;
3855 }
3856
3857 /* Helper function, same as init_omega_for_ddr but specialized for
3858 data references A and B. */
3859
3860 static bool
3861 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3862 struct data_dependence_relation *ddr,
3863 omega_pb pb, bool *maybe_dependent)
3864 {
3865 unsigned i;
3866 int ineq;
3867 struct loop *loopi;
3868 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3869
3870 /* Insert an equality per subscript. */
3871 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3872 {
3873 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3874 ddr, pb, maybe_dependent))
3875 return false;
3876 else if (*maybe_dependent == false)
3877 {
3878 /* There is no dependence. */
3879 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3880 return true;
3881 }
3882 }
3883
3884 /* Insert inequalities: constraints corresponding to the iteration
3885 domain, i.e. the loops surrounding the references "loop_x" and
3886 the distance variables "dx". The layout of the OMEGA
3887 representation is as follows:
3888 - coef[0] is the constant
3889 - coef[1..nb_loops] are the protected variables that will not be
3890 removed by the solver: the "dx"
3891 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3892 */
3893 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3894 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3895 {
3896 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3897
3898 /* 0 <= loop_x */
3899 ineq = omega_add_zero_geq (pb, omega_black);
3900 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3901
3902 /* 0 <= loop_x + dx */
3903 ineq = omega_add_zero_geq (pb, omega_black);
3904 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3905 pb->geqs[ineq].coef[i + 1] = 1;
3906
3907 if (nbi != -1)
3908 {
3909 /* loop_x <= nb_iters */
3910 ineq = omega_add_zero_geq (pb, omega_black);
3911 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3912 pb->geqs[ineq].coef[0] = nbi;
3913
3914 /* loop_x + dx <= nb_iters */
3915 ineq = omega_add_zero_geq (pb, omega_black);
3916 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3917 pb->geqs[ineq].coef[i + 1] = -1;
3918 pb->geqs[ineq].coef[0] = nbi;
3919
3920 /* A step "dx" bigger than nb_iters is not feasible, so
3921 add "0 <= nb_iters + dx", */
3922 ineq = omega_add_zero_geq (pb, omega_black);
3923 pb->geqs[ineq].coef[i + 1] = 1;
3924 pb->geqs[ineq].coef[0] = nbi;
3925 /* and "dx <= nb_iters". */
3926 ineq = omega_add_zero_geq (pb, omega_black);
3927 pb->geqs[ineq].coef[i + 1] = -1;
3928 pb->geqs[ineq].coef[0] = nbi;
3929 }
3930 }
3931
3932 omega_extract_distance_vectors (pb, ddr);
3933
3934 return true;
3935 }
3936
3937 /* Sets up the Omega dependence problem for the data dependence
3938 relation DDR. Returns false when the constraint system cannot be
3939 built, ie. when the test answers "don't know". Returns true
3940 otherwise, and when independence has been proved (using one of the
3941 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3942 set MAYBE_DEPENDENT to true.
3943
3944 Example: for setting up the dependence system corresponding to the
3945 conflicting accesses
3946
3947 | loop_i
3948 | loop_j
3949 | A[i, i+1] = ...
3950 | ... A[2*j, 2*(i + j)]
3951 | endloop_j
3952 | endloop_i
3953
3954 the following constraints come from the iteration domain:
3955
3956 0 <= i <= Ni
3957 0 <= i + di <= Ni
3958 0 <= j <= Nj
3959 0 <= j + dj <= Nj
3960
3961 where di, dj are the distance variables. The constraints
3962 representing the conflicting elements are:
3963
3964 i = 2 * (j + dj)
3965 i + 1 = 2 * (i + di + j + dj)
3966
3967 For asking that the resulting distance vector (di, dj) be
3968 lexicographically positive, we insert the constraint "di >= 0". If
3969 "di = 0" in the solution, we fix that component to zero, and we
3970 look at the inner loops: we set a new problem where all the outer
3971 loop distances are zero, and fix this inner component to be
3972 positive. When one of the components is positive, we save that
3973 distance, and set a new problem where the distance on this loop is
3974 zero, searching for other distances in the inner loops. Here is
3975 the classic example that illustrates that we have to set for each
3976 inner loop a new problem:
3977
3978 | loop_1
3979 | loop_2
3980 | A[10]
3981 | endloop_2
3982 | endloop_1
3983
3984 we have to save two distances (1, 0) and (0, 1).
3985
3986 Given two array references, refA and refB, we have to set the
3987 dependence problem twice, refA vs. refB and refB vs. refA, and we
3988 cannot do a single test, as refB might occur before refA in the
3989 inner loops, and the contrary when considering outer loops: ex.
3990
3991 | loop_0
3992 | loop_1
3993 | loop_2
3994 | T[{1,+,1}_2][{1,+,1}_1] // refA
3995 | T[{2,+,1}_2][{0,+,1}_1] // refB
3996 | endloop_2
3997 | endloop_1
3998 | endloop_0
3999
4000 refB touches the elements in T before refA, and thus for the same
4001 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4002 but for successive loop_0 iterations, we have (1, -1, 1)
4003
4004 The Omega solver expects the distance variables ("di" in the
4005 previous example) to come first in the constraint system (as
4006 variables to be protected, or "safe" variables), the constraint
4007 system is built using the following layout:
4008
4009 "cst | distance vars | index vars".
4010 */
4011
4012 static bool
4013 init_omega_for_ddr (struct data_dependence_relation *ddr,
4014 bool *maybe_dependent)
4015 {
4016 omega_pb pb;
4017 bool res = false;
4018
4019 *maybe_dependent = true;
4020
4021 if (same_access_functions (ddr))
4022 {
4023 unsigned j;
4024 lambda_vector dir_v;
4025
4026 /* Save the 0 vector. */
4027 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4028 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4029 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4030 dir_v[j] = dir_equal;
4031 save_dir_v (ddr, dir_v);
4032
4033 /* Save the dependences carried by outer loops. */
4034 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4035 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4036 maybe_dependent);
4037 omega_free_problem (pb);
4038 return res;
4039 }
4040
4041 /* Omega expects the protected variables (those that have to be kept
4042 after elimination) to appear first in the constraint system.
4043 These variables are the distance variables. In the following
4044 initialization we declare NB_LOOPS safe variables, and the total
4045 number of variables for the constraint system is 2*NB_LOOPS. */
4046 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4047 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4048 maybe_dependent);
4049 omega_free_problem (pb);
4050
4051 /* Stop computation if not decidable, or no dependence. */
4052 if (res == false || *maybe_dependent == false)
4053 return res;
4054
4055 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4056 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4057 maybe_dependent);
4058 omega_free_problem (pb);
4059
4060 return res;
4061 }
4062
4063 /* Return true when DDR contains the same information as that stored
4064 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4065
4066 static bool
4067 ddr_consistent_p (FILE *file,
4068 struct data_dependence_relation *ddr,
4069 vec<lambda_vector> dist_vects,
4070 vec<lambda_vector> dir_vects)
4071 {
4072 unsigned int i, j;
4073
4074 /* If dump_file is set, output there. */
4075 if (dump_file && (dump_flags & TDF_DETAILS))
4076 file = dump_file;
4077
4078 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4079 {
4080 lambda_vector b_dist_v;
4081 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4082 dist_vects.length (),
4083 DDR_NUM_DIST_VECTS (ddr));
4084
4085 fprintf (file, "Banerjee dist vectors:\n");
4086 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4087 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4088
4089 fprintf (file, "Omega dist vectors:\n");
4090 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4091 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4092
4093 fprintf (file, "data dependence relation:\n");
4094 dump_data_dependence_relation (file, ddr);
4095
4096 fprintf (file, ")\n");
4097 return false;
4098 }
4099
4100 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4101 {
4102 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4103 dir_vects.length (),
4104 DDR_NUM_DIR_VECTS (ddr));
4105 return false;
4106 }
4107
4108 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4109 {
4110 lambda_vector a_dist_v;
4111 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4112
4113 /* Distance vectors are not ordered in the same way in the DDR
4114 and in the DIST_VECTS: search for a matching vector. */
4115 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4116 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4117 break;
4118
4119 if (j == dist_vects.length ())
4120 {
4121 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4122 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4123 fprintf (file, "not found in Omega dist vectors:\n");
4124 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4125 fprintf (file, "data dependence relation:\n");
4126 dump_data_dependence_relation (file, ddr);
4127 fprintf (file, ")\n");
4128 }
4129 }
4130
4131 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4132 {
4133 lambda_vector a_dir_v;
4134 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4135
4136 /* Direction vectors are not ordered in the same way in the DDR
4137 and in the DIR_VECTS: search for a matching vector. */
4138 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4139 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4140 break;
4141
4142 if (j == dist_vects.length ())
4143 {
4144 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4145 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4146 fprintf (file, "not found in Omega dir vectors:\n");
4147 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4148 fprintf (file, "data dependence relation:\n");
4149 dump_data_dependence_relation (file, ddr);
4150 fprintf (file, ")\n");
4151 }
4152 }
4153
4154 return true;
4155 }
4156
4157 /* This computes the affine dependence relation between A and B with
4158 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4159 independence between two accesses, while CHREC_DONT_KNOW is used
4160 for representing the unknown relation.
4161
4162 Note that it is possible to stop the computation of the dependence
4163 relation the first time we detect a CHREC_KNOWN element for a given
4164 subscript. */
4165
4166 void
4167 compute_affine_dependence (struct data_dependence_relation *ddr,
4168 struct loop *loop_nest)
4169 {
4170 struct data_reference *dra = DDR_A (ddr);
4171 struct data_reference *drb = DDR_B (ddr);
4172
4173 if (dump_file && (dump_flags & TDF_DETAILS))
4174 {
4175 fprintf (dump_file, "(compute_affine_dependence\n");
4176 fprintf (dump_file, " stmt_a: ");
4177 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4178 fprintf (dump_file, " stmt_b: ");
4179 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4180 }
4181
4182 /* Analyze only when the dependence relation is not yet known. */
4183 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4184 {
4185 dependence_stats.num_dependence_tests++;
4186
4187 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4188 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4189 {
4190 subscript_dependence_tester (ddr, loop_nest);
4191
4192 if (flag_check_data_deps)
4193 {
4194 /* Dump the dependences from the first algorithm. */
4195 if (dump_file && (dump_flags & TDF_DETAILS))
4196 {
4197 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4198 dump_data_dependence_relation (dump_file, ddr);
4199 }
4200
4201 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4202 {
4203 bool maybe_dependent;
4204 vec<lambda_vector> dir_vects, dist_vects;
4205
4206 /* Save the result of the first DD analyzer. */
4207 dist_vects = DDR_DIST_VECTS (ddr);
4208 dir_vects = DDR_DIR_VECTS (ddr);
4209
4210 /* Reset the information. */
4211 DDR_DIST_VECTS (ddr).create (0);
4212 DDR_DIR_VECTS (ddr).create (0);
4213
4214 /* Compute the same information using Omega. */
4215 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4216 goto csys_dont_know;
4217
4218 if (dump_file && (dump_flags & TDF_DETAILS))
4219 {
4220 fprintf (dump_file, "Omega Analyzer\n");
4221 dump_data_dependence_relation (dump_file, ddr);
4222 }
4223
4224 /* Check that we get the same information. */
4225 if (maybe_dependent)
4226 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4227 dir_vects));
4228 }
4229 }
4230 }
4231
4232 /* As a last case, if the dependence cannot be determined, or if
4233 the dependence is considered too difficult to determine, answer
4234 "don't know". */
4235 else
4236 {
4237 csys_dont_know:;
4238 dependence_stats.num_dependence_undetermined++;
4239
4240 if (dump_file && (dump_flags & TDF_DETAILS))
4241 {
4242 fprintf (dump_file, "Data ref a:\n");
4243 dump_data_reference (dump_file, dra);
4244 fprintf (dump_file, "Data ref b:\n");
4245 dump_data_reference (dump_file, drb);
4246 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4247 }
4248 finalize_ddr_dependent (ddr, chrec_dont_know);
4249 }
4250 }
4251
4252 if (dump_file && (dump_flags & TDF_DETAILS))
4253 {
4254 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4255 fprintf (dump_file, ") -> no dependence\n");
4256 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4257 fprintf (dump_file, ") -> dependence analysis failed\n");
4258 else
4259 fprintf (dump_file, ")\n");
4260 }
4261 }
4262
4263 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4264 the data references in DATAREFS, in the LOOP_NEST. When
4265 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4266 relations. Return true when successful, i.e. data references number
4267 is small enough to be handled. */
4268
4269 bool
4270 compute_all_dependences (vec<data_reference_p> datarefs,
4271 vec<ddr_p> *dependence_relations,
4272 vec<loop_p> loop_nest,
4273 bool compute_self_and_rr)
4274 {
4275 struct data_dependence_relation *ddr;
4276 struct data_reference *a, *b;
4277 unsigned int i, j;
4278
4279 if ((int) datarefs.length ()
4280 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4281 {
4282 struct data_dependence_relation *ddr;
4283
4284 /* Insert a single relation into dependence_relations:
4285 chrec_dont_know. */
4286 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4287 dependence_relations->safe_push (ddr);
4288 return false;
4289 }
4290
4291 FOR_EACH_VEC_ELT (datarefs, i, a)
4292 for (j = i + 1; datarefs.iterate (j, &b); j++)
4293 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4294 {
4295 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4296 dependence_relations->safe_push (ddr);
4297 if (loop_nest.exists ())
4298 compute_affine_dependence (ddr, loop_nest[0]);
4299 }
4300
4301 if (compute_self_and_rr)
4302 FOR_EACH_VEC_ELT (datarefs, i, a)
4303 {
4304 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4305 dependence_relations->safe_push (ddr);
4306 if (loop_nest.exists ())
4307 compute_affine_dependence (ddr, loop_nest[0]);
4308 }
4309
4310 return true;
4311 }
4312
4313 /* Describes a location of a memory reference. */
4314
4315 typedef struct data_ref_loc_d
4316 {
4317 /* Position of the memory reference. */
4318 tree *pos;
4319
4320 /* True if the memory reference is read. */
4321 bool is_read;
4322 } data_ref_loc;
4323
4324
4325 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4326 true if STMT clobbers memory, false otherwise. */
4327
4328 static bool
4329 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4330 {
4331 bool clobbers_memory = false;
4332 data_ref_loc ref;
4333 tree *op0, *op1;
4334 enum gimple_code stmt_code = gimple_code (stmt);
4335
4336 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4337 As we cannot model data-references to not spelled out
4338 accesses give up if they may occur. */
4339 if (stmt_code == GIMPLE_CALL
4340 && !(gimple_call_flags (stmt) & ECF_CONST))
4341 {
4342 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4343 if (gimple_call_internal_p (stmt)
4344 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
4345 {
4346 struct loop *loop = gimple_bb (stmt)->loop_father;
4347 tree uid = gimple_call_arg (stmt, 0);
4348 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4349 if (loop == NULL
4350 || loop->simduid != SSA_NAME_VAR (uid))
4351 clobbers_memory = true;
4352 }
4353 else
4354 clobbers_memory = true;
4355 }
4356 else if (stmt_code == GIMPLE_ASM
4357 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))
4358 clobbers_memory = true;
4359
4360 if (!gimple_vuse (stmt))
4361 return clobbers_memory;
4362
4363 if (stmt_code == GIMPLE_ASSIGN)
4364 {
4365 tree base;
4366 op0 = gimple_assign_lhs_ptr (stmt);
4367 op1 = gimple_assign_rhs1_ptr (stmt);
4368
4369 if (DECL_P (*op1)
4370 || (REFERENCE_CLASS_P (*op1)
4371 && (base = get_base_address (*op1))
4372 && TREE_CODE (base) != SSA_NAME))
4373 {
4374 ref.pos = op1;
4375 ref.is_read = true;
4376 references->safe_push (ref);
4377 }
4378 }
4379 else if (stmt_code == GIMPLE_CALL)
4380 {
4381 unsigned i, n;
4382
4383 op0 = gimple_call_lhs_ptr (stmt);
4384 n = gimple_call_num_args (stmt);
4385 for (i = 0; i < n; i++)
4386 {
4387 op1 = gimple_call_arg_ptr (stmt, i);
4388
4389 if (DECL_P (*op1)
4390 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4391 {
4392 ref.pos = op1;
4393 ref.is_read = true;
4394 references->safe_push (ref);
4395 }
4396 }
4397 }
4398 else
4399 return clobbers_memory;
4400
4401 if (*op0
4402 && (DECL_P (*op0)
4403 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4404 {
4405 ref.pos = op0;
4406 ref.is_read = false;
4407 references->safe_push (ref);
4408 }
4409 return clobbers_memory;
4410 }
4411
4412 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4413 reference, returns false, otherwise returns true. NEST is the outermost
4414 loop of the loop nest in which the references should be analyzed. */
4415
4416 bool
4417 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4418 vec<data_reference_p> *datarefs)
4419 {
4420 unsigned i;
4421 stack_vec<data_ref_loc, 2> references;
4422 data_ref_loc *ref;
4423 bool ret = true;
4424 data_reference_p dr;
4425
4426 if (get_references_in_stmt (stmt, &references))
4427 return false;
4428
4429 FOR_EACH_VEC_ELT (references, i, ref)
4430 {
4431 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4432 *ref->pos, stmt, ref->is_read);
4433 gcc_assert (dr != NULL);
4434 datarefs->safe_push (dr);
4435 }
4436 references.release ();
4437 return ret;
4438 }
4439
4440 /* Stores the data references in STMT to DATAREFS. If there is an
4441 unanalyzable reference, returns false, otherwise returns true.
4442 NEST is the outermost loop of the loop nest in which the references
4443 should be instantiated, LOOP is the loop in which the references
4444 should be analyzed. */
4445
4446 bool
4447 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4448 vec<data_reference_p> *datarefs)
4449 {
4450 unsigned i;
4451 stack_vec<data_ref_loc, 2> references;
4452 data_ref_loc *ref;
4453 bool ret = true;
4454 data_reference_p dr;
4455
4456 if (get_references_in_stmt (stmt, &references))
4457 return false;
4458
4459 FOR_EACH_VEC_ELT (references, i, ref)
4460 {
4461 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4462 gcc_assert (dr != NULL);
4463 datarefs->safe_push (dr);
4464 }
4465
4466 references.release ();
4467 return ret;
4468 }
4469
4470 /* Search the data references in LOOP, and record the information into
4471 DATAREFS. Returns chrec_dont_know when failing to analyze a
4472 difficult case, returns NULL_TREE otherwise. */
4473
4474 tree
4475 find_data_references_in_bb (struct loop *loop, basic_block bb,
4476 vec<data_reference_p> *datarefs)
4477 {
4478 gimple_stmt_iterator bsi;
4479
4480 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4481 {
4482 gimple stmt = gsi_stmt (bsi);
4483
4484 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4485 {
4486 struct data_reference *res;
4487 res = XCNEW (struct data_reference);
4488 datarefs->safe_push (res);
4489
4490 return chrec_dont_know;
4491 }
4492 }
4493
4494 return NULL_TREE;
4495 }
4496
4497 /* Search the data references in LOOP, and record the information into
4498 DATAREFS. Returns chrec_dont_know when failing to analyze a
4499 difficult case, returns NULL_TREE otherwise.
4500
4501 TODO: This function should be made smarter so that it can handle address
4502 arithmetic as if they were array accesses, etc. */
4503
4504 tree
4505 find_data_references_in_loop (struct loop *loop,
4506 vec<data_reference_p> *datarefs)
4507 {
4508 basic_block bb, *bbs;
4509 unsigned int i;
4510
4511 bbs = get_loop_body_in_dom_order (loop);
4512
4513 for (i = 0; i < loop->num_nodes; i++)
4514 {
4515 bb = bbs[i];
4516
4517 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4518 {
4519 free (bbs);
4520 return chrec_dont_know;
4521 }
4522 }
4523 free (bbs);
4524
4525 return NULL_TREE;
4526 }
4527
4528 /* Recursive helper function. */
4529
4530 static bool
4531 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4532 {
4533 /* Inner loops of the nest should not contain siblings. Example:
4534 when there are two consecutive loops,
4535
4536 | loop_0
4537 | loop_1
4538 | A[{0, +, 1}_1]
4539 | endloop_1
4540 | loop_2
4541 | A[{0, +, 1}_2]
4542 | endloop_2
4543 | endloop_0
4544
4545 the dependence relation cannot be captured by the distance
4546 abstraction. */
4547 if (loop->next)
4548 return false;
4549
4550 loop_nest->safe_push (loop);
4551 if (loop->inner)
4552 return find_loop_nest_1 (loop->inner, loop_nest);
4553 return true;
4554 }
4555
4556 /* Return false when the LOOP is not well nested. Otherwise return
4557 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4558 contain the loops from the outermost to the innermost, as they will
4559 appear in the classic distance vector. */
4560
4561 bool
4562 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4563 {
4564 loop_nest->safe_push (loop);
4565 if (loop->inner)
4566 return find_loop_nest_1 (loop->inner, loop_nest);
4567 return true;
4568 }
4569
4570 /* Returns true when the data dependences have been computed, false otherwise.
4571 Given a loop nest LOOP, the following vectors are returned:
4572 DATAREFS is initialized to all the array elements contained in this loop,
4573 DEPENDENCE_RELATIONS contains the relations between the data references.
4574 Compute read-read and self relations if
4575 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4576
4577 bool
4578 compute_data_dependences_for_loop (struct loop *loop,
4579 bool compute_self_and_read_read_dependences,
4580 vec<loop_p> *loop_nest,
4581 vec<data_reference_p> *datarefs,
4582 vec<ddr_p> *dependence_relations)
4583 {
4584 bool res = true;
4585
4586 memset (&dependence_stats, 0, sizeof (dependence_stats));
4587
4588 /* If the loop nest is not well formed, or one of the data references
4589 is not computable, give up without spending time to compute other
4590 dependences. */
4591 if (!loop
4592 || !find_loop_nest (loop, loop_nest)
4593 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4594 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4595 compute_self_and_read_read_dependences))
4596 res = false;
4597
4598 if (dump_file && (dump_flags & TDF_STATS))
4599 {
4600 fprintf (dump_file, "Dependence tester statistics:\n");
4601
4602 fprintf (dump_file, "Number of dependence tests: %d\n",
4603 dependence_stats.num_dependence_tests);
4604 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4605 dependence_stats.num_dependence_dependent);
4606 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4607 dependence_stats.num_dependence_independent);
4608 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4609 dependence_stats.num_dependence_undetermined);
4610
4611 fprintf (dump_file, "Number of subscript tests: %d\n",
4612 dependence_stats.num_subscript_tests);
4613 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4614 dependence_stats.num_subscript_undetermined);
4615 fprintf (dump_file, "Number of same subscript function: %d\n",
4616 dependence_stats.num_same_subscript_function);
4617
4618 fprintf (dump_file, "Number of ziv tests: %d\n",
4619 dependence_stats.num_ziv);
4620 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4621 dependence_stats.num_ziv_dependent);
4622 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4623 dependence_stats.num_ziv_independent);
4624 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4625 dependence_stats.num_ziv_unimplemented);
4626
4627 fprintf (dump_file, "Number of siv tests: %d\n",
4628 dependence_stats.num_siv);
4629 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4630 dependence_stats.num_siv_dependent);
4631 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4632 dependence_stats.num_siv_independent);
4633 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4634 dependence_stats.num_siv_unimplemented);
4635
4636 fprintf (dump_file, "Number of miv tests: %d\n",
4637 dependence_stats.num_miv);
4638 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4639 dependence_stats.num_miv_dependent);
4640 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4641 dependence_stats.num_miv_independent);
4642 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4643 dependence_stats.num_miv_unimplemented);
4644 }
4645
4646 return res;
4647 }
4648
4649 /* Returns true when the data dependences for the basic block BB have been
4650 computed, false otherwise.
4651 DATAREFS is initialized to all the array elements contained in this basic
4652 block, DEPENDENCE_RELATIONS contains the relations between the data
4653 references. Compute read-read and self relations if
4654 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4655 bool
4656 compute_data_dependences_for_bb (basic_block bb,
4657 bool compute_self_and_read_read_dependences,
4658 vec<data_reference_p> *datarefs,
4659 vec<ddr_p> *dependence_relations)
4660 {
4661 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4662 return false;
4663
4664 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4665 compute_self_and_read_read_dependences);
4666 }
4667
4668 /* Entry point (for testing only). Analyze all the data references
4669 and the dependence relations in LOOP.
4670
4671 The data references are computed first.
4672
4673 A relation on these nodes is represented by a complete graph. Some
4674 of the relations could be of no interest, thus the relations can be
4675 computed on demand.
4676
4677 In the following function we compute all the relations. This is
4678 just a first implementation that is here for:
4679 - for showing how to ask for the dependence relations,
4680 - for the debugging the whole dependence graph,
4681 - for the dejagnu testcases and maintenance.
4682
4683 It is possible to ask only for a part of the graph, avoiding to
4684 compute the whole dependence graph. The computed dependences are
4685 stored in a knowledge base (KB) such that later queries don't
4686 recompute the same information. The implementation of this KB is
4687 transparent to the optimizer, and thus the KB can be changed with a
4688 more efficient implementation, or the KB could be disabled. */
4689 static void
4690 analyze_all_data_dependences (struct loop *loop)
4691 {
4692 unsigned int i;
4693 int nb_data_refs = 10;
4694 vec<data_reference_p> datarefs;
4695 datarefs.create (nb_data_refs);
4696 vec<ddr_p> dependence_relations;
4697 dependence_relations.create (nb_data_refs * nb_data_refs);
4698 vec<loop_p> loop_nest;
4699 loop_nest.create (3);
4700
4701 /* Compute DDs on the whole function. */
4702 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4703 &dependence_relations);
4704
4705 if (dump_file)
4706 {
4707 dump_data_dependence_relations (dump_file, dependence_relations);
4708 fprintf (dump_file, "\n\n");
4709
4710 if (dump_flags & TDF_DETAILS)
4711 dump_dist_dir_vectors (dump_file, dependence_relations);
4712
4713 if (dump_flags & TDF_STATS)
4714 {
4715 unsigned nb_top_relations = 0;
4716 unsigned nb_bot_relations = 0;
4717 unsigned nb_chrec_relations = 0;
4718 struct data_dependence_relation *ddr;
4719
4720 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4721 {
4722 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4723 nb_top_relations++;
4724
4725 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4726 nb_bot_relations++;
4727
4728 else
4729 nb_chrec_relations++;
4730 }
4731
4732 gather_stats_on_scev_database ();
4733 }
4734 }
4735
4736 loop_nest.release ();
4737 free_dependence_relations (dependence_relations);
4738 free_data_refs (datarefs);
4739 }
4740
4741 /* Computes all the data dependences and check that the results of
4742 several analyzers are the same. */
4743
4744 void
4745 tree_check_data_deps (void)
4746 {
4747 loop_iterator li;
4748 struct loop *loop_nest;
4749
4750 FOR_EACH_LOOP (li, loop_nest, 0)
4751 analyze_all_data_dependences (loop_nest);
4752 }
4753
4754 /* Free the memory used by a data dependence relation DDR. */
4755
4756 void
4757 free_dependence_relation (struct data_dependence_relation *ddr)
4758 {
4759 if (ddr == NULL)
4760 return;
4761
4762 if (DDR_SUBSCRIPTS (ddr).exists ())
4763 free_subscripts (DDR_SUBSCRIPTS (ddr));
4764 DDR_DIST_VECTS (ddr).release ();
4765 DDR_DIR_VECTS (ddr).release ();
4766
4767 free (ddr);
4768 }
4769
4770 /* Free the memory used by the data dependence relations from
4771 DEPENDENCE_RELATIONS. */
4772
4773 void
4774 free_dependence_relations (vec<ddr_p> dependence_relations)
4775 {
4776 unsigned int i;
4777 struct data_dependence_relation *ddr;
4778
4779 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4780 if (ddr)
4781 free_dependence_relation (ddr);
4782
4783 dependence_relations.release ();
4784 }
4785
4786 /* Free the memory used by the data references from DATAREFS. */
4787
4788 void
4789 free_data_refs (vec<data_reference_p> datarefs)
4790 {
4791 unsigned int i;
4792 struct data_reference *dr;
4793
4794 FOR_EACH_VEC_ELT (datarefs, i, dr)
4795 free_data_ref (dr);
4796 datarefs.release ();
4797 }