coretypes.h: Include machmode.h...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "hash-set.h"
80 #include "vec.h"
81 #include "input.h"
82 #include "alias.h"
83 #include "symtab.h"
84 #include "options.h"
85 #include "inchash.h"
86 #include "tree.h"
87 #include "fold-const.h"
88 #include "hashtab.h"
89 #include "tm.h"
90 #include "hard-reg-set.h"
91 #include "function.h"
92 #include "rtl.h"
93 #include "flags.h"
94 #include "statistics.h"
95 #include "insn-config.h"
96 #include "expmed.h"
97 #include "dojump.h"
98 #include "explow.h"
99 #include "calls.h"
100 #include "emit-rtl.h"
101 #include "varasm.h"
102 #include "stmt.h"
103 #include "expr.h"
104 #include "gimple-pretty-print.h"
105 #include "predict.h"
106 #include "dominance.h"
107 #include "cfg.h"
108 #include "basic-block.h"
109 #include "tree-ssa-alias.h"
110 #include "internal-fn.h"
111 #include "gimple-expr.h"
112 #include "is-a.h"
113 #include "gimple.h"
114 #include "gimple-iterator.h"
115 #include "tree-ssa-loop-niter.h"
116 #include "tree-ssa-loop.h"
117 #include "tree-ssa.h"
118 #include "cfgloop.h"
119 #include "tree-data-ref.h"
120 #include "tree-scalar-evolution.h"
121 #include "dumpfile.h"
122 #include "langhooks.h"
123 #include "tree-affine.h"
124 #include "params.h"
125
126 static struct datadep_stats
127 {
128 int num_dependence_tests;
129 int num_dependence_dependent;
130 int num_dependence_independent;
131 int num_dependence_undetermined;
132
133 int num_subscript_tests;
134 int num_subscript_undetermined;
135 int num_same_subscript_function;
136
137 int num_ziv;
138 int num_ziv_independent;
139 int num_ziv_dependent;
140 int num_ziv_unimplemented;
141
142 int num_siv;
143 int num_siv_independent;
144 int num_siv_dependent;
145 int num_siv_unimplemented;
146
147 int num_miv;
148 int num_miv_independent;
149 int num_miv_dependent;
150 int num_miv_unimplemented;
151 } dependence_stats;
152
153 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
154 struct data_reference *,
155 struct data_reference *,
156 struct loop *);
157 /* Returns true iff A divides B. */
158
159 static inline bool
160 tree_fold_divides_p (const_tree a, const_tree b)
161 {
162 gcc_assert (TREE_CODE (a) == INTEGER_CST);
163 gcc_assert (TREE_CODE (b) == INTEGER_CST);
164 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
165 }
166
167 /* Returns true iff A divides B. */
168
169 static inline bool
170 int_divides_p (int a, int b)
171 {
172 return ((b % a) == 0);
173 }
174
175 \f
176
177 /* Dump into FILE all the data references from DATAREFS. */
178
179 static void
180 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
181 {
182 unsigned int i;
183 struct data_reference *dr;
184
185 FOR_EACH_VEC_ELT (datarefs, i, dr)
186 dump_data_reference (file, dr);
187 }
188
189 /* Unified dump into FILE all the data references from DATAREFS. */
190
191 DEBUG_FUNCTION void
192 debug (vec<data_reference_p> &ref)
193 {
194 dump_data_references (stderr, ref);
195 }
196
197 DEBUG_FUNCTION void
198 debug (vec<data_reference_p> *ptr)
199 {
200 if (ptr)
201 debug (*ptr);
202 else
203 fprintf (stderr, "<nil>\n");
204 }
205
206
207 /* Dump into STDERR all the data references from DATAREFS. */
208
209 DEBUG_FUNCTION void
210 debug_data_references (vec<data_reference_p> datarefs)
211 {
212 dump_data_references (stderr, datarefs);
213 }
214
215 /* Print to STDERR the data_reference DR. */
216
217 DEBUG_FUNCTION void
218 debug_data_reference (struct data_reference *dr)
219 {
220 dump_data_reference (stderr, dr);
221 }
222
223 /* Dump function for a DATA_REFERENCE structure. */
224
225 void
226 dump_data_reference (FILE *outf,
227 struct data_reference *dr)
228 {
229 unsigned int i;
230
231 fprintf (outf, "#(Data Ref: \n");
232 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
233 fprintf (outf, "# stmt: ");
234 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
235 fprintf (outf, "# ref: ");
236 print_generic_stmt (outf, DR_REF (dr), 0);
237 fprintf (outf, "# base_object: ");
238 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
239
240 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
241 {
242 fprintf (outf, "# Access function %d: ", i);
243 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
244 }
245 fprintf (outf, "#)\n");
246 }
247
248 /* Unified dump function for a DATA_REFERENCE structure. */
249
250 DEBUG_FUNCTION void
251 debug (data_reference &ref)
252 {
253 dump_data_reference (stderr, &ref);
254 }
255
256 DEBUG_FUNCTION void
257 debug (data_reference *ptr)
258 {
259 if (ptr)
260 debug (*ptr);
261 else
262 fprintf (stderr, "<nil>\n");
263 }
264
265
266 /* Dumps the affine function described by FN to the file OUTF. */
267
268 static void
269 dump_affine_function (FILE *outf, affine_fn fn)
270 {
271 unsigned i;
272 tree coef;
273
274 print_generic_expr (outf, fn[0], TDF_SLIM);
275 for (i = 1; fn.iterate (i, &coef); i++)
276 {
277 fprintf (outf, " + ");
278 print_generic_expr (outf, coef, TDF_SLIM);
279 fprintf (outf, " * x_%u", i);
280 }
281 }
282
283 /* Dumps the conflict function CF to the file OUTF. */
284
285 static void
286 dump_conflict_function (FILE *outf, conflict_function *cf)
287 {
288 unsigned i;
289
290 if (cf->n == NO_DEPENDENCE)
291 fprintf (outf, "no dependence");
292 else if (cf->n == NOT_KNOWN)
293 fprintf (outf, "not known");
294 else
295 {
296 for (i = 0; i < cf->n; i++)
297 {
298 if (i != 0)
299 fprintf (outf, " ");
300 fprintf (outf, "[");
301 dump_affine_function (outf, cf->fns[i]);
302 fprintf (outf, "]");
303 }
304 }
305 }
306
307 /* Dump function for a SUBSCRIPT structure. */
308
309 static void
310 dump_subscript (FILE *outf, struct subscript *subscript)
311 {
312 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
313
314 fprintf (outf, "\n (subscript \n");
315 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
316 dump_conflict_function (outf, cf);
317 if (CF_NONTRIVIAL_P (cf))
318 {
319 tree last_iteration = SUB_LAST_CONFLICT (subscript);
320 fprintf (outf, "\n last_conflict: ");
321 print_generic_expr (outf, last_iteration, 0);
322 }
323
324 cf = SUB_CONFLICTS_IN_B (subscript);
325 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
326 dump_conflict_function (outf, cf);
327 if (CF_NONTRIVIAL_P (cf))
328 {
329 tree last_iteration = SUB_LAST_CONFLICT (subscript);
330 fprintf (outf, "\n last_conflict: ");
331 print_generic_expr (outf, last_iteration, 0);
332 }
333
334 fprintf (outf, "\n (Subscript distance: ");
335 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
336 fprintf (outf, " ))\n");
337 }
338
339 /* Print the classic direction vector DIRV to OUTF. */
340
341 static void
342 print_direction_vector (FILE *outf,
343 lambda_vector dirv,
344 int length)
345 {
346 int eq;
347
348 for (eq = 0; eq < length; eq++)
349 {
350 enum data_dependence_direction dir = ((enum data_dependence_direction)
351 dirv[eq]);
352
353 switch (dir)
354 {
355 case dir_positive:
356 fprintf (outf, " +");
357 break;
358 case dir_negative:
359 fprintf (outf, " -");
360 break;
361 case dir_equal:
362 fprintf (outf, " =");
363 break;
364 case dir_positive_or_equal:
365 fprintf (outf, " +=");
366 break;
367 case dir_positive_or_negative:
368 fprintf (outf, " +-");
369 break;
370 case dir_negative_or_equal:
371 fprintf (outf, " -=");
372 break;
373 case dir_star:
374 fprintf (outf, " *");
375 break;
376 default:
377 fprintf (outf, "indep");
378 break;
379 }
380 }
381 fprintf (outf, "\n");
382 }
383
384 /* Print a vector of direction vectors. */
385
386 static void
387 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
388 int length)
389 {
390 unsigned j;
391 lambda_vector v;
392
393 FOR_EACH_VEC_ELT (dir_vects, j, v)
394 print_direction_vector (outf, v, length);
395 }
396
397 /* Print out a vector VEC of length N to OUTFILE. */
398
399 static inline void
400 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
401 {
402 int i;
403
404 for (i = 0; i < n; i++)
405 fprintf (outfile, "%3d ", vector[i]);
406 fprintf (outfile, "\n");
407 }
408
409 /* Print a vector of distance vectors. */
410
411 static void
412 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
413 int length)
414 {
415 unsigned j;
416 lambda_vector v;
417
418 FOR_EACH_VEC_ELT (dist_vects, j, v)
419 print_lambda_vector (outf, v, length);
420 }
421
422 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
423
424 static void
425 dump_data_dependence_relation (FILE *outf,
426 struct data_dependence_relation *ddr)
427 {
428 struct data_reference *dra, *drb;
429
430 fprintf (outf, "(Data Dep: \n");
431
432 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
433 {
434 if (ddr)
435 {
436 dra = DDR_A (ddr);
437 drb = DDR_B (ddr);
438 if (dra)
439 dump_data_reference (outf, dra);
440 else
441 fprintf (outf, " (nil)\n");
442 if (drb)
443 dump_data_reference (outf, drb);
444 else
445 fprintf (outf, " (nil)\n");
446 }
447 fprintf (outf, " (don't know)\n)\n");
448 return;
449 }
450
451 dra = DDR_A (ddr);
452 drb = DDR_B (ddr);
453 dump_data_reference (outf, dra);
454 dump_data_reference (outf, drb);
455
456 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
457 fprintf (outf, " (no dependence)\n");
458
459 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
460 {
461 unsigned int i;
462 struct loop *loopi;
463
464 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
465 {
466 fprintf (outf, " access_fn_A: ");
467 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
468 fprintf (outf, " access_fn_B: ");
469 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
470 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
471 }
472
473 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
474 fprintf (outf, " loop nest: (");
475 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
476 fprintf (outf, "%d ", loopi->num);
477 fprintf (outf, ")\n");
478
479 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
480 {
481 fprintf (outf, " distance_vector: ");
482 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
483 DDR_NB_LOOPS (ddr));
484 }
485
486 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
487 {
488 fprintf (outf, " direction_vector: ");
489 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
490 DDR_NB_LOOPS (ddr));
491 }
492 }
493
494 fprintf (outf, ")\n");
495 }
496
497 /* Debug version. */
498
499 DEBUG_FUNCTION void
500 debug_data_dependence_relation (struct data_dependence_relation *ddr)
501 {
502 dump_data_dependence_relation (stderr, ddr);
503 }
504
505 /* Dump into FILE all the dependence relations from DDRS. */
506
507 void
508 dump_data_dependence_relations (FILE *file,
509 vec<ddr_p> ddrs)
510 {
511 unsigned int i;
512 struct data_dependence_relation *ddr;
513
514 FOR_EACH_VEC_ELT (ddrs, i, ddr)
515 dump_data_dependence_relation (file, ddr);
516 }
517
518 DEBUG_FUNCTION void
519 debug (vec<ddr_p> &ref)
520 {
521 dump_data_dependence_relations (stderr, ref);
522 }
523
524 DEBUG_FUNCTION void
525 debug (vec<ddr_p> *ptr)
526 {
527 if (ptr)
528 debug (*ptr);
529 else
530 fprintf (stderr, "<nil>\n");
531 }
532
533
534 /* Dump to STDERR all the dependence relations from DDRS. */
535
536 DEBUG_FUNCTION void
537 debug_data_dependence_relations (vec<ddr_p> ddrs)
538 {
539 dump_data_dependence_relations (stderr, ddrs);
540 }
541
542 /* Dumps the distance and direction vectors in FILE. DDRS contains
543 the dependence relations, and VECT_SIZE is the size of the
544 dependence vectors, or in other words the number of loops in the
545 considered nest. */
546
547 static void
548 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
549 {
550 unsigned int i, j;
551 struct data_dependence_relation *ddr;
552 lambda_vector v;
553
554 FOR_EACH_VEC_ELT (ddrs, i, ddr)
555 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
556 {
557 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
558 {
559 fprintf (file, "DISTANCE_V (");
560 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
561 fprintf (file, ")\n");
562 }
563
564 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
565 {
566 fprintf (file, "DIRECTION_V (");
567 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
568 fprintf (file, ")\n");
569 }
570 }
571
572 fprintf (file, "\n\n");
573 }
574
575 /* Dumps the data dependence relations DDRS in FILE. */
576
577 static void
578 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
579 {
580 unsigned int i;
581 struct data_dependence_relation *ddr;
582
583 FOR_EACH_VEC_ELT (ddrs, i, ddr)
584 dump_data_dependence_relation (file, ddr);
585
586 fprintf (file, "\n\n");
587 }
588
589 DEBUG_FUNCTION void
590 debug_ddrs (vec<ddr_p> ddrs)
591 {
592 dump_ddrs (stderr, ddrs);
593 }
594
595 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
596 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
597 constant of type ssizetype, and returns true. If we cannot do this
598 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
599 is returned. */
600
601 static bool
602 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
603 tree *var, tree *off)
604 {
605 tree var0, var1;
606 tree off0, off1;
607 enum tree_code ocode = code;
608
609 *var = NULL_TREE;
610 *off = NULL_TREE;
611
612 switch (code)
613 {
614 case INTEGER_CST:
615 *var = build_int_cst (type, 0);
616 *off = fold_convert (ssizetype, op0);
617 return true;
618
619 case POINTER_PLUS_EXPR:
620 ocode = PLUS_EXPR;
621 /* FALLTHROUGH */
622 case PLUS_EXPR:
623 case MINUS_EXPR:
624 split_constant_offset (op0, &var0, &off0);
625 split_constant_offset (op1, &var1, &off1);
626 *var = fold_build2 (code, type, var0, var1);
627 *off = size_binop (ocode, off0, off1);
628 return true;
629
630 case MULT_EXPR:
631 if (TREE_CODE (op1) != INTEGER_CST)
632 return false;
633
634 split_constant_offset (op0, &var0, &off0);
635 *var = fold_build2 (MULT_EXPR, type, var0, op1);
636 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
637 return true;
638
639 case ADDR_EXPR:
640 {
641 tree base, poffset;
642 HOST_WIDE_INT pbitsize, pbitpos;
643 machine_mode pmode;
644 int punsignedp, pvolatilep;
645
646 op0 = TREE_OPERAND (op0, 0);
647 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
648 &pmode, &punsignedp, &pvolatilep, false);
649
650 if (pbitpos % BITS_PER_UNIT != 0)
651 return false;
652 base = build_fold_addr_expr (base);
653 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
654
655 if (poffset)
656 {
657 split_constant_offset (poffset, &poffset, &off1);
658 off0 = size_binop (PLUS_EXPR, off0, off1);
659 if (POINTER_TYPE_P (TREE_TYPE (base)))
660 base = fold_build_pointer_plus (base, poffset);
661 else
662 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
663 fold_convert (TREE_TYPE (base), poffset));
664 }
665
666 var0 = fold_convert (type, base);
667
668 /* If variable length types are involved, punt, otherwise casts
669 might be converted into ARRAY_REFs in gimplify_conversion.
670 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
671 possibly no longer appears in current GIMPLE, might resurface.
672 This perhaps could run
673 if (CONVERT_EXPR_P (var0))
674 {
675 gimplify_conversion (&var0);
676 // Attempt to fill in any within var0 found ARRAY_REF's
677 // element size from corresponding op embedded ARRAY_REF,
678 // if unsuccessful, just punt.
679 } */
680 while (POINTER_TYPE_P (type))
681 type = TREE_TYPE (type);
682 if (int_size_in_bytes (type) < 0)
683 return false;
684
685 *var = var0;
686 *off = off0;
687 return true;
688 }
689
690 case SSA_NAME:
691 {
692 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
693 return false;
694
695 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
696 enum tree_code subcode;
697
698 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
699 return false;
700
701 var0 = gimple_assign_rhs1 (def_stmt);
702 subcode = gimple_assign_rhs_code (def_stmt);
703 var1 = gimple_assign_rhs2 (def_stmt);
704
705 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
706 }
707 CASE_CONVERT:
708 {
709 /* We must not introduce undefined overflow, and we must not change the value.
710 Hence we're okay if the inner type doesn't overflow to start with
711 (pointer or signed), the outer type also is an integer or pointer
712 and the outer precision is at least as large as the inner. */
713 tree itype = TREE_TYPE (op0);
714 if ((POINTER_TYPE_P (itype)
715 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
716 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
717 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
718 {
719 split_constant_offset (op0, &var0, off);
720 *var = fold_convert (type, var0);
721 return true;
722 }
723 return false;
724 }
725
726 default:
727 return false;
728 }
729 }
730
731 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
732 will be ssizetype. */
733
734 void
735 split_constant_offset (tree exp, tree *var, tree *off)
736 {
737 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
738 enum tree_code code;
739
740 *var = exp;
741 *off = ssize_int (0);
742 STRIP_NOPS (exp);
743
744 if (tree_is_chrec (exp)
745 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
746 return;
747
748 otype = TREE_TYPE (exp);
749 code = TREE_CODE (exp);
750 extract_ops_from_tree (exp, &code, &op0, &op1);
751 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
752 {
753 *var = fold_convert (type, e);
754 *off = o;
755 }
756 }
757
758 /* Returns the address ADDR of an object in a canonical shape (without nop
759 casts, and with type of pointer to the object). */
760
761 static tree
762 canonicalize_base_object_address (tree addr)
763 {
764 tree orig = addr;
765
766 STRIP_NOPS (addr);
767
768 /* The base address may be obtained by casting from integer, in that case
769 keep the cast. */
770 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
771 return orig;
772
773 if (TREE_CODE (addr) != ADDR_EXPR)
774 return addr;
775
776 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
777 }
778
779 /* Analyzes the behavior of the memory reference DR in the innermost loop or
780 basic block that contains it. Returns true if analysis succeed or false
781 otherwise. */
782
783 bool
784 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
785 {
786 gimple stmt = DR_STMT (dr);
787 struct loop *loop = loop_containing_stmt (stmt);
788 tree ref = DR_REF (dr);
789 HOST_WIDE_INT pbitsize, pbitpos;
790 tree base, poffset;
791 machine_mode pmode;
792 int punsignedp, pvolatilep;
793 affine_iv base_iv, offset_iv;
794 tree init, dinit, step;
795 bool in_loop = (loop && loop->num);
796
797 if (dump_file && (dump_flags & TDF_DETAILS))
798 fprintf (dump_file, "analyze_innermost: ");
799
800 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
801 &pmode, &punsignedp, &pvolatilep, false);
802 gcc_assert (base != NULL_TREE);
803
804 if (pbitpos % BITS_PER_UNIT != 0)
805 {
806 if (dump_file && (dump_flags & TDF_DETAILS))
807 fprintf (dump_file, "failed: bit offset alignment.\n");
808 return false;
809 }
810
811 if (TREE_CODE (base) == MEM_REF)
812 {
813 if (!integer_zerop (TREE_OPERAND (base, 1)))
814 {
815 offset_int moff = mem_ref_offset (base);
816 tree mofft = wide_int_to_tree (sizetype, moff);
817 if (!poffset)
818 poffset = mofft;
819 else
820 poffset = size_binop (PLUS_EXPR, poffset, mofft);
821 }
822 base = TREE_OPERAND (base, 0);
823 }
824 else
825 base = build_fold_addr_expr (base);
826
827 if (in_loop)
828 {
829 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
830 nest ? true : false))
831 {
832 if (nest)
833 {
834 if (dump_file && (dump_flags & TDF_DETAILS))
835 fprintf (dump_file, "failed: evolution of base is not"
836 " affine.\n");
837 return false;
838 }
839 else
840 {
841 base_iv.base = base;
842 base_iv.step = ssize_int (0);
843 base_iv.no_overflow = true;
844 }
845 }
846 }
847 else
848 {
849 base_iv.base = base;
850 base_iv.step = ssize_int (0);
851 base_iv.no_overflow = true;
852 }
853
854 if (!poffset)
855 {
856 offset_iv.base = ssize_int (0);
857 offset_iv.step = ssize_int (0);
858 }
859 else
860 {
861 if (!in_loop)
862 {
863 offset_iv.base = poffset;
864 offset_iv.step = ssize_int (0);
865 }
866 else if (!simple_iv (loop, loop_containing_stmt (stmt),
867 poffset, &offset_iv,
868 nest ? true : false))
869 {
870 if (nest)
871 {
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "failed: evolution of offset is not"
874 " affine.\n");
875 return false;
876 }
877 else
878 {
879 offset_iv.base = poffset;
880 offset_iv.step = ssize_int (0);
881 }
882 }
883 }
884
885 init = ssize_int (pbitpos / BITS_PER_UNIT);
886 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
887 init = size_binop (PLUS_EXPR, init, dinit);
888 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
889 init = size_binop (PLUS_EXPR, init, dinit);
890
891 step = size_binop (PLUS_EXPR,
892 fold_convert (ssizetype, base_iv.step),
893 fold_convert (ssizetype, offset_iv.step));
894
895 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
896
897 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
898 DR_INIT (dr) = init;
899 DR_STEP (dr) = step;
900
901 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
902
903 if (dump_file && (dump_flags & TDF_DETAILS))
904 fprintf (dump_file, "success.\n");
905
906 return true;
907 }
908
909 /* Determines the base object and the list of indices of memory reference
910 DR, analyzed in LOOP and instantiated in loop nest NEST. */
911
912 static void
913 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
914 {
915 vec<tree> access_fns = vNULL;
916 tree ref, op;
917 tree base, off, access_fn;
918 basic_block before_loop;
919
920 /* If analyzing a basic-block there are no indices to analyze
921 and thus no access functions. */
922 if (!nest)
923 {
924 DR_BASE_OBJECT (dr) = DR_REF (dr);
925 DR_ACCESS_FNS (dr).create (0);
926 return;
927 }
928
929 ref = DR_REF (dr);
930 before_loop = block_before_loop (nest);
931
932 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
933 into a two element array with a constant index. The base is
934 then just the immediate underlying object. */
935 if (TREE_CODE (ref) == REALPART_EXPR)
936 {
937 ref = TREE_OPERAND (ref, 0);
938 access_fns.safe_push (integer_zero_node);
939 }
940 else if (TREE_CODE (ref) == IMAGPART_EXPR)
941 {
942 ref = TREE_OPERAND (ref, 0);
943 access_fns.safe_push (integer_one_node);
944 }
945
946 /* Analyze access functions of dimensions we know to be independent. */
947 while (handled_component_p (ref))
948 {
949 if (TREE_CODE (ref) == ARRAY_REF)
950 {
951 op = TREE_OPERAND (ref, 1);
952 access_fn = analyze_scalar_evolution (loop, op);
953 access_fn = instantiate_scev (before_loop, loop, access_fn);
954 access_fns.safe_push (access_fn);
955 }
956 else if (TREE_CODE (ref) == COMPONENT_REF
957 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
958 {
959 /* For COMPONENT_REFs of records (but not unions!) use the
960 FIELD_DECL offset as constant access function so we can
961 disambiguate a[i].f1 and a[i].f2. */
962 tree off = component_ref_field_offset (ref);
963 off = size_binop (PLUS_EXPR,
964 size_binop (MULT_EXPR,
965 fold_convert (bitsizetype, off),
966 bitsize_int (BITS_PER_UNIT)),
967 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
968 access_fns.safe_push (off);
969 }
970 else
971 /* If we have an unhandled component we could not translate
972 to an access function stop analyzing. We have determined
973 our base object in this case. */
974 break;
975
976 ref = TREE_OPERAND (ref, 0);
977 }
978
979 /* If the address operand of a MEM_REF base has an evolution in the
980 analyzed nest, add it as an additional independent access-function. */
981 if (TREE_CODE (ref) == MEM_REF)
982 {
983 op = TREE_OPERAND (ref, 0);
984 access_fn = analyze_scalar_evolution (loop, op);
985 access_fn = instantiate_scev (before_loop, loop, access_fn);
986 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
987 {
988 tree orig_type;
989 tree memoff = TREE_OPERAND (ref, 1);
990 base = initial_condition (access_fn);
991 orig_type = TREE_TYPE (base);
992 STRIP_USELESS_TYPE_CONVERSION (base);
993 split_constant_offset (base, &base, &off);
994 STRIP_USELESS_TYPE_CONVERSION (base);
995 /* Fold the MEM_REF offset into the evolutions initial
996 value to make more bases comparable. */
997 if (!integer_zerop (memoff))
998 {
999 off = size_binop (PLUS_EXPR, off,
1000 fold_convert (ssizetype, memoff));
1001 memoff = build_int_cst (TREE_TYPE (memoff), 0);
1002 }
1003 /* Adjust the offset so it is a multiple of the access type
1004 size and thus we separate bases that can possibly be used
1005 to produce partial overlaps (which the access_fn machinery
1006 cannot handle). */
1007 wide_int rem;
1008 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1009 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1010 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1011 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
1012 else
1013 /* If we can't compute the remainder simply force the initial
1014 condition to zero. */
1015 rem = off;
1016 off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
1017 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1018 /* And finally replace the initial condition. */
1019 access_fn = chrec_replace_initial_condition
1020 (access_fn, fold_convert (orig_type, off));
1021 /* ??? This is still not a suitable base object for
1022 dr_may_alias_p - the base object needs to be an
1023 access that covers the object as whole. With
1024 an evolution in the pointer this cannot be
1025 guaranteed.
1026 As a band-aid, mark the access so we can special-case
1027 it in dr_may_alias_p. */
1028 tree old = ref;
1029 ref = fold_build2_loc (EXPR_LOCATION (ref),
1030 MEM_REF, TREE_TYPE (ref),
1031 base, memoff);
1032 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1033 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1034 DR_UNCONSTRAINED_BASE (dr) = true;
1035 access_fns.safe_push (access_fn);
1036 }
1037 }
1038 else if (DECL_P (ref))
1039 {
1040 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1041 ref = build2 (MEM_REF, TREE_TYPE (ref),
1042 build_fold_addr_expr (ref),
1043 build_int_cst (reference_alias_ptr_type (ref), 0));
1044 }
1045
1046 DR_BASE_OBJECT (dr) = ref;
1047 DR_ACCESS_FNS (dr) = access_fns;
1048 }
1049
1050 /* Extracts the alias analysis information from the memory reference DR. */
1051
1052 static void
1053 dr_analyze_alias (struct data_reference *dr)
1054 {
1055 tree ref = DR_REF (dr);
1056 tree base = get_base_address (ref), addr;
1057
1058 if (INDIRECT_REF_P (base)
1059 || TREE_CODE (base) == MEM_REF)
1060 {
1061 addr = TREE_OPERAND (base, 0);
1062 if (TREE_CODE (addr) == SSA_NAME)
1063 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1064 }
1065 }
1066
1067 /* Frees data reference DR. */
1068
1069 void
1070 free_data_ref (data_reference_p dr)
1071 {
1072 DR_ACCESS_FNS (dr).release ();
1073 free (dr);
1074 }
1075
1076 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1077 is read if IS_READ is true, write otherwise. Returns the
1078 data_reference description of MEMREF. NEST is the outermost loop
1079 in which the reference should be instantiated, LOOP is the loop in
1080 which the data reference should be analyzed. */
1081
1082 struct data_reference *
1083 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1084 bool is_read)
1085 {
1086 struct data_reference *dr;
1087
1088 if (dump_file && (dump_flags & TDF_DETAILS))
1089 {
1090 fprintf (dump_file, "Creating dr for ");
1091 print_generic_expr (dump_file, memref, TDF_SLIM);
1092 fprintf (dump_file, "\n");
1093 }
1094
1095 dr = XCNEW (struct data_reference);
1096 DR_STMT (dr) = stmt;
1097 DR_REF (dr) = memref;
1098 DR_IS_READ (dr) = is_read;
1099
1100 dr_analyze_innermost (dr, nest);
1101 dr_analyze_indices (dr, nest, loop);
1102 dr_analyze_alias (dr);
1103
1104 if (dump_file && (dump_flags & TDF_DETAILS))
1105 {
1106 unsigned i;
1107 fprintf (dump_file, "\tbase_address: ");
1108 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1109 fprintf (dump_file, "\n\toffset from base address: ");
1110 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1111 fprintf (dump_file, "\n\tconstant offset from base address: ");
1112 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1113 fprintf (dump_file, "\n\tstep: ");
1114 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1115 fprintf (dump_file, "\n\taligned to: ");
1116 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1117 fprintf (dump_file, "\n\tbase_object: ");
1118 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1119 fprintf (dump_file, "\n");
1120 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1121 {
1122 fprintf (dump_file, "\tAccess function %d: ", i);
1123 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1124 }
1125 }
1126
1127 return dr;
1128 }
1129
1130 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1131 expressions. */
1132 static bool
1133 dr_equal_offsets_p1 (tree offset1, tree offset2)
1134 {
1135 bool res;
1136
1137 STRIP_NOPS (offset1);
1138 STRIP_NOPS (offset2);
1139
1140 if (offset1 == offset2)
1141 return true;
1142
1143 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1144 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1145 return false;
1146
1147 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1148 TREE_OPERAND (offset2, 0));
1149
1150 if (!res || !BINARY_CLASS_P (offset1))
1151 return res;
1152
1153 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1154 TREE_OPERAND (offset2, 1));
1155
1156 return res;
1157 }
1158
1159 /* Check if DRA and DRB have equal offsets. */
1160 bool
1161 dr_equal_offsets_p (struct data_reference *dra,
1162 struct data_reference *drb)
1163 {
1164 tree offset1, offset2;
1165
1166 offset1 = DR_OFFSET (dra);
1167 offset2 = DR_OFFSET (drb);
1168
1169 return dr_equal_offsets_p1 (offset1, offset2);
1170 }
1171
1172 /* Returns true if FNA == FNB. */
1173
1174 static bool
1175 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1176 {
1177 unsigned i, n = fna.length ();
1178
1179 if (n != fnb.length ())
1180 return false;
1181
1182 for (i = 0; i < n; i++)
1183 if (!operand_equal_p (fna[i], fnb[i], 0))
1184 return false;
1185
1186 return true;
1187 }
1188
1189 /* If all the functions in CF are the same, returns one of them,
1190 otherwise returns NULL. */
1191
1192 static affine_fn
1193 common_affine_function (conflict_function *cf)
1194 {
1195 unsigned i;
1196 affine_fn comm;
1197
1198 if (!CF_NONTRIVIAL_P (cf))
1199 return affine_fn ();
1200
1201 comm = cf->fns[0];
1202
1203 for (i = 1; i < cf->n; i++)
1204 if (!affine_function_equal_p (comm, cf->fns[i]))
1205 return affine_fn ();
1206
1207 return comm;
1208 }
1209
1210 /* Returns the base of the affine function FN. */
1211
1212 static tree
1213 affine_function_base (affine_fn fn)
1214 {
1215 return fn[0];
1216 }
1217
1218 /* Returns true if FN is a constant. */
1219
1220 static bool
1221 affine_function_constant_p (affine_fn fn)
1222 {
1223 unsigned i;
1224 tree coef;
1225
1226 for (i = 1; fn.iterate (i, &coef); i++)
1227 if (!integer_zerop (coef))
1228 return false;
1229
1230 return true;
1231 }
1232
1233 /* Returns true if FN is the zero constant function. */
1234
1235 static bool
1236 affine_function_zero_p (affine_fn fn)
1237 {
1238 return (integer_zerop (affine_function_base (fn))
1239 && affine_function_constant_p (fn));
1240 }
1241
1242 /* Returns a signed integer type with the largest precision from TA
1243 and TB. */
1244
1245 static tree
1246 signed_type_for_types (tree ta, tree tb)
1247 {
1248 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1249 return signed_type_for (ta);
1250 else
1251 return signed_type_for (tb);
1252 }
1253
1254 /* Applies operation OP on affine functions FNA and FNB, and returns the
1255 result. */
1256
1257 static affine_fn
1258 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1259 {
1260 unsigned i, n, m;
1261 affine_fn ret;
1262 tree coef;
1263
1264 if (fnb.length () > fna.length ())
1265 {
1266 n = fna.length ();
1267 m = fnb.length ();
1268 }
1269 else
1270 {
1271 n = fnb.length ();
1272 m = fna.length ();
1273 }
1274
1275 ret.create (m);
1276 for (i = 0; i < n; i++)
1277 {
1278 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1279 TREE_TYPE (fnb[i]));
1280 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1281 }
1282
1283 for (; fna.iterate (i, &coef); i++)
1284 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1285 coef, integer_zero_node));
1286 for (; fnb.iterate (i, &coef); i++)
1287 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1288 integer_zero_node, coef));
1289
1290 return ret;
1291 }
1292
1293 /* Returns the sum of affine functions FNA and FNB. */
1294
1295 static affine_fn
1296 affine_fn_plus (affine_fn fna, affine_fn fnb)
1297 {
1298 return affine_fn_op (PLUS_EXPR, fna, fnb);
1299 }
1300
1301 /* Returns the difference of affine functions FNA and FNB. */
1302
1303 static affine_fn
1304 affine_fn_minus (affine_fn fna, affine_fn fnb)
1305 {
1306 return affine_fn_op (MINUS_EXPR, fna, fnb);
1307 }
1308
1309 /* Frees affine function FN. */
1310
1311 static void
1312 affine_fn_free (affine_fn fn)
1313 {
1314 fn.release ();
1315 }
1316
1317 /* Determine for each subscript in the data dependence relation DDR
1318 the distance. */
1319
1320 static void
1321 compute_subscript_distance (struct data_dependence_relation *ddr)
1322 {
1323 conflict_function *cf_a, *cf_b;
1324 affine_fn fn_a, fn_b, diff;
1325
1326 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1327 {
1328 unsigned int i;
1329
1330 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1331 {
1332 struct subscript *subscript;
1333
1334 subscript = DDR_SUBSCRIPT (ddr, i);
1335 cf_a = SUB_CONFLICTS_IN_A (subscript);
1336 cf_b = SUB_CONFLICTS_IN_B (subscript);
1337
1338 fn_a = common_affine_function (cf_a);
1339 fn_b = common_affine_function (cf_b);
1340 if (!fn_a.exists () || !fn_b.exists ())
1341 {
1342 SUB_DISTANCE (subscript) = chrec_dont_know;
1343 return;
1344 }
1345 diff = affine_fn_minus (fn_a, fn_b);
1346
1347 if (affine_function_constant_p (diff))
1348 SUB_DISTANCE (subscript) = affine_function_base (diff);
1349 else
1350 SUB_DISTANCE (subscript) = chrec_dont_know;
1351
1352 affine_fn_free (diff);
1353 }
1354 }
1355 }
1356
1357 /* Returns the conflict function for "unknown". */
1358
1359 static conflict_function *
1360 conflict_fn_not_known (void)
1361 {
1362 conflict_function *fn = XCNEW (conflict_function);
1363 fn->n = NOT_KNOWN;
1364
1365 return fn;
1366 }
1367
1368 /* Returns the conflict function for "independent". */
1369
1370 static conflict_function *
1371 conflict_fn_no_dependence (void)
1372 {
1373 conflict_function *fn = XCNEW (conflict_function);
1374 fn->n = NO_DEPENDENCE;
1375
1376 return fn;
1377 }
1378
1379 /* Returns true if the address of OBJ is invariant in LOOP. */
1380
1381 static bool
1382 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1383 {
1384 while (handled_component_p (obj))
1385 {
1386 if (TREE_CODE (obj) == ARRAY_REF)
1387 {
1388 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1389 need to check the stride and the lower bound of the reference. */
1390 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1391 loop->num)
1392 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1393 loop->num))
1394 return false;
1395 }
1396 else if (TREE_CODE (obj) == COMPONENT_REF)
1397 {
1398 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1399 loop->num))
1400 return false;
1401 }
1402 obj = TREE_OPERAND (obj, 0);
1403 }
1404
1405 if (!INDIRECT_REF_P (obj)
1406 && TREE_CODE (obj) != MEM_REF)
1407 return true;
1408
1409 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1410 loop->num);
1411 }
1412
1413 /* Returns false if we can prove that data references A and B do not alias,
1414 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1415 considered. */
1416
1417 bool
1418 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1419 bool loop_nest)
1420 {
1421 tree addr_a = DR_BASE_OBJECT (a);
1422 tree addr_b = DR_BASE_OBJECT (b);
1423
1424 /* If we are not processing a loop nest but scalar code we
1425 do not need to care about possible cross-iteration dependences
1426 and thus can process the full original reference. Do so,
1427 similar to how loop invariant motion applies extra offset-based
1428 disambiguation. */
1429 if (!loop_nest)
1430 {
1431 aff_tree off1, off2;
1432 widest_int size1, size2;
1433 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1434 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1435 aff_combination_scale (&off1, -1);
1436 aff_combination_add (&off2, &off1);
1437 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1438 return false;
1439 }
1440
1441 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
1442 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
1443 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
1444 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
1445 return false;
1446
1447 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1448 do not know the size of the base-object. So we cannot do any
1449 offset/overlap based analysis but have to rely on points-to
1450 information only. */
1451 if (TREE_CODE (addr_a) == MEM_REF
1452 && (DR_UNCONSTRAINED_BASE (a)
1453 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
1454 {
1455 /* For true dependences we can apply TBAA. */
1456 if (flag_strict_aliasing
1457 && DR_IS_WRITE (a) && DR_IS_READ (b)
1458 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1459 get_alias_set (DR_REF (b))))
1460 return false;
1461 if (TREE_CODE (addr_b) == MEM_REF)
1462 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1463 TREE_OPERAND (addr_b, 0));
1464 else
1465 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1466 build_fold_addr_expr (addr_b));
1467 }
1468 else if (TREE_CODE (addr_b) == MEM_REF
1469 && (DR_UNCONSTRAINED_BASE (b)
1470 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
1471 {
1472 /* For true dependences we can apply TBAA. */
1473 if (flag_strict_aliasing
1474 && DR_IS_WRITE (a) && DR_IS_READ (b)
1475 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1476 get_alias_set (DR_REF (b))))
1477 return false;
1478 if (TREE_CODE (addr_a) == MEM_REF)
1479 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1480 TREE_OPERAND (addr_b, 0));
1481 else
1482 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1483 TREE_OPERAND (addr_b, 0));
1484 }
1485
1486 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1487 that is being subsetted in the loop nest. */
1488 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1489 return refs_output_dependent_p (addr_a, addr_b);
1490 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1491 return refs_anti_dependent_p (addr_a, addr_b);
1492 return refs_may_alias_p (addr_a, addr_b);
1493 }
1494
1495 /* Initialize a data dependence relation between data accesses A and
1496 B. NB_LOOPS is the number of loops surrounding the references: the
1497 size of the classic distance/direction vectors. */
1498
1499 struct data_dependence_relation *
1500 initialize_data_dependence_relation (struct data_reference *a,
1501 struct data_reference *b,
1502 vec<loop_p> loop_nest)
1503 {
1504 struct data_dependence_relation *res;
1505 unsigned int i;
1506
1507 res = XNEW (struct data_dependence_relation);
1508 DDR_A (res) = a;
1509 DDR_B (res) = b;
1510 DDR_LOOP_NEST (res).create (0);
1511 DDR_REVERSED_P (res) = false;
1512 DDR_SUBSCRIPTS (res).create (0);
1513 DDR_DIR_VECTS (res).create (0);
1514 DDR_DIST_VECTS (res).create (0);
1515
1516 if (a == NULL || b == NULL)
1517 {
1518 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1519 return res;
1520 }
1521
1522 /* If the data references do not alias, then they are independent. */
1523 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1524 {
1525 DDR_ARE_DEPENDENT (res) = chrec_known;
1526 return res;
1527 }
1528
1529 /* The case where the references are exactly the same. */
1530 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1531 {
1532 if (loop_nest.exists ()
1533 && !object_address_invariant_in_loop_p (loop_nest[0],
1534 DR_BASE_OBJECT (a)))
1535 {
1536 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1537 return res;
1538 }
1539 DDR_AFFINE_P (res) = true;
1540 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1541 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1542 DDR_LOOP_NEST (res) = loop_nest;
1543 DDR_INNER_LOOP (res) = 0;
1544 DDR_SELF_REFERENCE (res) = true;
1545 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1546 {
1547 struct subscript *subscript;
1548
1549 subscript = XNEW (struct subscript);
1550 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1551 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1552 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1553 SUB_DISTANCE (subscript) = chrec_dont_know;
1554 DDR_SUBSCRIPTS (res).safe_push (subscript);
1555 }
1556 return res;
1557 }
1558
1559 /* If the references do not access the same object, we do not know
1560 whether they alias or not. */
1561 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1562 {
1563 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1564 return res;
1565 }
1566
1567 /* If the base of the object is not invariant in the loop nest, we cannot
1568 analyze it. TODO -- in fact, it would suffice to record that there may
1569 be arbitrary dependences in the loops where the base object varies. */
1570 if (loop_nest.exists ()
1571 && !object_address_invariant_in_loop_p (loop_nest[0],
1572 DR_BASE_OBJECT (a)))
1573 {
1574 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1575 return res;
1576 }
1577
1578 /* If the number of dimensions of the access to not agree we can have
1579 a pointer access to a component of the array element type and an
1580 array access while the base-objects are still the same. Punt. */
1581 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1582 {
1583 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1584 return res;
1585 }
1586
1587 DDR_AFFINE_P (res) = true;
1588 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1589 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1590 DDR_LOOP_NEST (res) = loop_nest;
1591 DDR_INNER_LOOP (res) = 0;
1592 DDR_SELF_REFERENCE (res) = false;
1593
1594 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1595 {
1596 struct subscript *subscript;
1597
1598 subscript = XNEW (struct subscript);
1599 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1600 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1601 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1602 SUB_DISTANCE (subscript) = chrec_dont_know;
1603 DDR_SUBSCRIPTS (res).safe_push (subscript);
1604 }
1605
1606 return res;
1607 }
1608
1609 /* Frees memory used by the conflict function F. */
1610
1611 static void
1612 free_conflict_function (conflict_function *f)
1613 {
1614 unsigned i;
1615
1616 if (CF_NONTRIVIAL_P (f))
1617 {
1618 for (i = 0; i < f->n; i++)
1619 affine_fn_free (f->fns[i]);
1620 }
1621 free (f);
1622 }
1623
1624 /* Frees memory used by SUBSCRIPTS. */
1625
1626 static void
1627 free_subscripts (vec<subscript_p> subscripts)
1628 {
1629 unsigned i;
1630 subscript_p s;
1631
1632 FOR_EACH_VEC_ELT (subscripts, i, s)
1633 {
1634 free_conflict_function (s->conflicting_iterations_in_a);
1635 free_conflict_function (s->conflicting_iterations_in_b);
1636 free (s);
1637 }
1638 subscripts.release ();
1639 }
1640
1641 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1642 description. */
1643
1644 static inline void
1645 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1646 tree chrec)
1647 {
1648 DDR_ARE_DEPENDENT (ddr) = chrec;
1649 free_subscripts (DDR_SUBSCRIPTS (ddr));
1650 DDR_SUBSCRIPTS (ddr).create (0);
1651 }
1652
1653 /* The dependence relation DDR cannot be represented by a distance
1654 vector. */
1655
1656 static inline void
1657 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1658 {
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1660 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1661
1662 DDR_AFFINE_P (ddr) = false;
1663 }
1664
1665 \f
1666
1667 /* This section contains the classic Banerjee tests. */
1668
1669 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1670 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1671
1672 static inline bool
1673 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1674 {
1675 return (evolution_function_is_constant_p (chrec_a)
1676 && evolution_function_is_constant_p (chrec_b));
1677 }
1678
1679 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1680 variable, i.e., if the SIV (Single Index Variable) test is true. */
1681
1682 static bool
1683 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1684 {
1685 if ((evolution_function_is_constant_p (chrec_a)
1686 && evolution_function_is_univariate_p (chrec_b))
1687 || (evolution_function_is_constant_p (chrec_b)
1688 && evolution_function_is_univariate_p (chrec_a)))
1689 return true;
1690
1691 if (evolution_function_is_univariate_p (chrec_a)
1692 && evolution_function_is_univariate_p (chrec_b))
1693 {
1694 switch (TREE_CODE (chrec_a))
1695 {
1696 case POLYNOMIAL_CHREC:
1697 switch (TREE_CODE (chrec_b))
1698 {
1699 case POLYNOMIAL_CHREC:
1700 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1701 return false;
1702
1703 default:
1704 return true;
1705 }
1706
1707 default:
1708 return true;
1709 }
1710 }
1711
1712 return false;
1713 }
1714
1715 /* Creates a conflict function with N dimensions. The affine functions
1716 in each dimension follow. */
1717
1718 static conflict_function *
1719 conflict_fn (unsigned n, ...)
1720 {
1721 unsigned i;
1722 conflict_function *ret = XCNEW (conflict_function);
1723 va_list ap;
1724
1725 gcc_assert (0 < n && n <= MAX_DIM);
1726 va_start (ap, n);
1727
1728 ret->n = n;
1729 for (i = 0; i < n; i++)
1730 ret->fns[i] = va_arg (ap, affine_fn);
1731 va_end (ap);
1732
1733 return ret;
1734 }
1735
1736 /* Returns constant affine function with value CST. */
1737
1738 static affine_fn
1739 affine_fn_cst (tree cst)
1740 {
1741 affine_fn fn;
1742 fn.create (1);
1743 fn.quick_push (cst);
1744 return fn;
1745 }
1746
1747 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1748
1749 static affine_fn
1750 affine_fn_univar (tree cst, unsigned dim, tree coef)
1751 {
1752 affine_fn fn;
1753 fn.create (dim + 1);
1754 unsigned i;
1755
1756 gcc_assert (dim > 0);
1757 fn.quick_push (cst);
1758 for (i = 1; i < dim; i++)
1759 fn.quick_push (integer_zero_node);
1760 fn.quick_push (coef);
1761 return fn;
1762 }
1763
1764 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1765 *OVERLAPS_B are initialized to the functions that describe the
1766 relation between the elements accessed twice by CHREC_A and
1767 CHREC_B. For k >= 0, the following property is verified:
1768
1769 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1770
1771 static void
1772 analyze_ziv_subscript (tree chrec_a,
1773 tree chrec_b,
1774 conflict_function **overlaps_a,
1775 conflict_function **overlaps_b,
1776 tree *last_conflicts)
1777 {
1778 tree type, difference;
1779 dependence_stats.num_ziv++;
1780
1781 if (dump_file && (dump_flags & TDF_DETAILS))
1782 fprintf (dump_file, "(analyze_ziv_subscript \n");
1783
1784 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1785 chrec_a = chrec_convert (type, chrec_a, NULL);
1786 chrec_b = chrec_convert (type, chrec_b, NULL);
1787 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1788
1789 switch (TREE_CODE (difference))
1790 {
1791 case INTEGER_CST:
1792 if (integer_zerop (difference))
1793 {
1794 /* The difference is equal to zero: the accessed index
1795 overlaps for each iteration in the loop. */
1796 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1797 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1798 *last_conflicts = chrec_dont_know;
1799 dependence_stats.num_ziv_dependent++;
1800 }
1801 else
1802 {
1803 /* The accesses do not overlap. */
1804 *overlaps_a = conflict_fn_no_dependence ();
1805 *overlaps_b = conflict_fn_no_dependence ();
1806 *last_conflicts = integer_zero_node;
1807 dependence_stats.num_ziv_independent++;
1808 }
1809 break;
1810
1811 default:
1812 /* We're not sure whether the indexes overlap. For the moment,
1813 conservatively answer "don't know". */
1814 if (dump_file && (dump_flags & TDF_DETAILS))
1815 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1816
1817 *overlaps_a = conflict_fn_not_known ();
1818 *overlaps_b = conflict_fn_not_known ();
1819 *last_conflicts = chrec_dont_know;
1820 dependence_stats.num_ziv_unimplemented++;
1821 break;
1822 }
1823
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, ")\n");
1826 }
1827
1828 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1829 and only if it fits to the int type. If this is not the case, or the
1830 bound on the number of iterations of LOOP could not be derived, returns
1831 chrec_dont_know. */
1832
1833 static tree
1834 max_stmt_executions_tree (struct loop *loop)
1835 {
1836 widest_int nit;
1837
1838 if (!max_stmt_executions (loop, &nit))
1839 return chrec_dont_know;
1840
1841 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1842 return chrec_dont_know;
1843
1844 return wide_int_to_tree (unsigned_type_node, nit);
1845 }
1846
1847 /* Determine whether the CHREC is always positive/negative. If the expression
1848 cannot be statically analyzed, return false, otherwise set the answer into
1849 VALUE. */
1850
1851 static bool
1852 chrec_is_positive (tree chrec, bool *value)
1853 {
1854 bool value0, value1, value2;
1855 tree end_value, nb_iter;
1856
1857 switch (TREE_CODE (chrec))
1858 {
1859 case POLYNOMIAL_CHREC:
1860 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1861 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1862 return false;
1863
1864 /* FIXME -- overflows. */
1865 if (value0 == value1)
1866 {
1867 *value = value0;
1868 return true;
1869 }
1870
1871 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1872 and the proof consists in showing that the sign never
1873 changes during the execution of the loop, from 0 to
1874 loop->nb_iterations. */
1875 if (!evolution_function_is_affine_p (chrec))
1876 return false;
1877
1878 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1879 if (chrec_contains_undetermined (nb_iter))
1880 return false;
1881
1882 #if 0
1883 /* TODO -- If the test is after the exit, we may decrease the number of
1884 iterations by one. */
1885 if (after_exit)
1886 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1887 #endif
1888
1889 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1890
1891 if (!chrec_is_positive (end_value, &value2))
1892 return false;
1893
1894 *value = value0;
1895 return value0 == value1;
1896
1897 case INTEGER_CST:
1898 switch (tree_int_cst_sgn (chrec))
1899 {
1900 case -1:
1901 *value = false;
1902 break;
1903 case 1:
1904 *value = true;
1905 break;
1906 default:
1907 return false;
1908 }
1909 return true;
1910
1911 default:
1912 return false;
1913 }
1914 }
1915
1916
1917 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1918 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1919 *OVERLAPS_B are initialized to the functions that describe the
1920 relation between the elements accessed twice by CHREC_A and
1921 CHREC_B. For k >= 0, the following property is verified:
1922
1923 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1924
1925 static void
1926 analyze_siv_subscript_cst_affine (tree chrec_a,
1927 tree chrec_b,
1928 conflict_function **overlaps_a,
1929 conflict_function **overlaps_b,
1930 tree *last_conflicts)
1931 {
1932 bool value0, value1, value2;
1933 tree type, difference, tmp;
1934
1935 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1936 chrec_a = chrec_convert (type, chrec_a, NULL);
1937 chrec_b = chrec_convert (type, chrec_b, NULL);
1938 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1939
1940 /* Special case overlap in the first iteration. */
1941 if (integer_zerop (difference))
1942 {
1943 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1944 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1945 *last_conflicts = integer_one_node;
1946 return;
1947 }
1948
1949 if (!chrec_is_positive (initial_condition (difference), &value0))
1950 {
1951 if (dump_file && (dump_flags & TDF_DETAILS))
1952 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1953
1954 dependence_stats.num_siv_unimplemented++;
1955 *overlaps_a = conflict_fn_not_known ();
1956 *overlaps_b = conflict_fn_not_known ();
1957 *last_conflicts = chrec_dont_know;
1958 return;
1959 }
1960 else
1961 {
1962 if (value0 == false)
1963 {
1964 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1965 {
1966 if (dump_file && (dump_flags & TDF_DETAILS))
1967 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1968
1969 *overlaps_a = conflict_fn_not_known ();
1970 *overlaps_b = conflict_fn_not_known ();
1971 *last_conflicts = chrec_dont_know;
1972 dependence_stats.num_siv_unimplemented++;
1973 return;
1974 }
1975 else
1976 {
1977 if (value1 == true)
1978 {
1979 /* Example:
1980 chrec_a = 12
1981 chrec_b = {10, +, 1}
1982 */
1983
1984 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1985 {
1986 HOST_WIDE_INT numiter;
1987 struct loop *loop = get_chrec_loop (chrec_b);
1988
1989 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1990 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1991 fold_build1 (ABS_EXPR, type, difference),
1992 CHREC_RIGHT (chrec_b));
1993 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1994 *last_conflicts = integer_one_node;
1995
1996
1997 /* Perform weak-zero siv test to see if overlap is
1998 outside the loop bounds. */
1999 numiter = max_stmt_executions_int (loop);
2000
2001 if (numiter >= 0
2002 && compare_tree_int (tmp, numiter) > 0)
2003 {
2004 free_conflict_function (*overlaps_a);
2005 free_conflict_function (*overlaps_b);
2006 *overlaps_a = conflict_fn_no_dependence ();
2007 *overlaps_b = conflict_fn_no_dependence ();
2008 *last_conflicts = integer_zero_node;
2009 dependence_stats.num_siv_independent++;
2010 return;
2011 }
2012 dependence_stats.num_siv_dependent++;
2013 return;
2014 }
2015
2016 /* When the step does not divide the difference, there are
2017 no overlaps. */
2018 else
2019 {
2020 *overlaps_a = conflict_fn_no_dependence ();
2021 *overlaps_b = conflict_fn_no_dependence ();
2022 *last_conflicts = integer_zero_node;
2023 dependence_stats.num_siv_independent++;
2024 return;
2025 }
2026 }
2027
2028 else
2029 {
2030 /* Example:
2031 chrec_a = 12
2032 chrec_b = {10, +, -1}
2033
2034 In this case, chrec_a will not overlap with chrec_b. */
2035 *overlaps_a = conflict_fn_no_dependence ();
2036 *overlaps_b = conflict_fn_no_dependence ();
2037 *last_conflicts = integer_zero_node;
2038 dependence_stats.num_siv_independent++;
2039 return;
2040 }
2041 }
2042 }
2043 else
2044 {
2045 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2046 {
2047 if (dump_file && (dump_flags & TDF_DETAILS))
2048 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2049
2050 *overlaps_a = conflict_fn_not_known ();
2051 *overlaps_b = conflict_fn_not_known ();
2052 *last_conflicts = chrec_dont_know;
2053 dependence_stats.num_siv_unimplemented++;
2054 return;
2055 }
2056 else
2057 {
2058 if (value2 == false)
2059 {
2060 /* Example:
2061 chrec_a = 3
2062 chrec_b = {10, +, -1}
2063 */
2064 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2065 {
2066 HOST_WIDE_INT numiter;
2067 struct loop *loop = get_chrec_loop (chrec_b);
2068
2069 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2070 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2071 CHREC_RIGHT (chrec_b));
2072 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2073 *last_conflicts = integer_one_node;
2074
2075 /* Perform weak-zero siv test to see if overlap is
2076 outside the loop bounds. */
2077 numiter = max_stmt_executions_int (loop);
2078
2079 if (numiter >= 0
2080 && compare_tree_int (tmp, numiter) > 0)
2081 {
2082 free_conflict_function (*overlaps_a);
2083 free_conflict_function (*overlaps_b);
2084 *overlaps_a = conflict_fn_no_dependence ();
2085 *overlaps_b = conflict_fn_no_dependence ();
2086 *last_conflicts = integer_zero_node;
2087 dependence_stats.num_siv_independent++;
2088 return;
2089 }
2090 dependence_stats.num_siv_dependent++;
2091 return;
2092 }
2093
2094 /* When the step does not divide the difference, there
2095 are no overlaps. */
2096 else
2097 {
2098 *overlaps_a = conflict_fn_no_dependence ();
2099 *overlaps_b = conflict_fn_no_dependence ();
2100 *last_conflicts = integer_zero_node;
2101 dependence_stats.num_siv_independent++;
2102 return;
2103 }
2104 }
2105 else
2106 {
2107 /* Example:
2108 chrec_a = 3
2109 chrec_b = {4, +, 1}
2110
2111 In this case, chrec_a will not overlap with chrec_b. */
2112 *overlaps_a = conflict_fn_no_dependence ();
2113 *overlaps_b = conflict_fn_no_dependence ();
2114 *last_conflicts = integer_zero_node;
2115 dependence_stats.num_siv_independent++;
2116 return;
2117 }
2118 }
2119 }
2120 }
2121 }
2122
2123 /* Helper recursive function for initializing the matrix A. Returns
2124 the initial value of CHREC. */
2125
2126 static tree
2127 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2128 {
2129 gcc_assert (chrec);
2130
2131 switch (TREE_CODE (chrec))
2132 {
2133 case POLYNOMIAL_CHREC:
2134 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2135
2136 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2137 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2138
2139 case PLUS_EXPR:
2140 case MULT_EXPR:
2141 case MINUS_EXPR:
2142 {
2143 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2144 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2145
2146 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2147 }
2148
2149 CASE_CONVERT:
2150 {
2151 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2152 return chrec_convert (chrec_type (chrec), op, NULL);
2153 }
2154
2155 case BIT_NOT_EXPR:
2156 {
2157 /* Handle ~X as -1 - X. */
2158 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2159 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2160 build_int_cst (TREE_TYPE (chrec), -1), op);
2161 }
2162
2163 case INTEGER_CST:
2164 return chrec;
2165
2166 default:
2167 gcc_unreachable ();
2168 return NULL_TREE;
2169 }
2170 }
2171
2172 #define FLOOR_DIV(x,y) ((x) / (y))
2173
2174 /* Solves the special case of the Diophantine equation:
2175 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2176
2177 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2178 number of iterations that loops X and Y run. The overlaps will be
2179 constructed as evolutions in dimension DIM. */
2180
2181 static void
2182 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2183 affine_fn *overlaps_a,
2184 affine_fn *overlaps_b,
2185 tree *last_conflicts, int dim)
2186 {
2187 if (((step_a > 0 && step_b > 0)
2188 || (step_a < 0 && step_b < 0)))
2189 {
2190 int step_overlaps_a, step_overlaps_b;
2191 int gcd_steps_a_b, last_conflict, tau2;
2192
2193 gcd_steps_a_b = gcd (step_a, step_b);
2194 step_overlaps_a = step_b / gcd_steps_a_b;
2195 step_overlaps_b = step_a / gcd_steps_a_b;
2196
2197 if (niter > 0)
2198 {
2199 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2200 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2201 last_conflict = tau2;
2202 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2203 }
2204 else
2205 *last_conflicts = chrec_dont_know;
2206
2207 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2208 build_int_cst (NULL_TREE,
2209 step_overlaps_a));
2210 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2211 build_int_cst (NULL_TREE,
2212 step_overlaps_b));
2213 }
2214
2215 else
2216 {
2217 *overlaps_a = affine_fn_cst (integer_zero_node);
2218 *overlaps_b = affine_fn_cst (integer_zero_node);
2219 *last_conflicts = integer_zero_node;
2220 }
2221 }
2222
2223 /* Solves the special case of a Diophantine equation where CHREC_A is
2224 an affine bivariate function, and CHREC_B is an affine univariate
2225 function. For example,
2226
2227 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2228
2229 has the following overlapping functions:
2230
2231 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2232 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2233 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2234
2235 FORNOW: This is a specialized implementation for a case occurring in
2236 a common benchmark. Implement the general algorithm. */
2237
2238 static void
2239 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2240 conflict_function **overlaps_a,
2241 conflict_function **overlaps_b,
2242 tree *last_conflicts)
2243 {
2244 bool xz_p, yz_p, xyz_p;
2245 int step_x, step_y, step_z;
2246 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2247 affine_fn overlaps_a_xz, overlaps_b_xz;
2248 affine_fn overlaps_a_yz, overlaps_b_yz;
2249 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2250 affine_fn ova1, ova2, ovb;
2251 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2252
2253 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2254 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2255 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2256
2257 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2258 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2259 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2260
2261 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2262 {
2263 if (dump_file && (dump_flags & TDF_DETAILS))
2264 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2265
2266 *overlaps_a = conflict_fn_not_known ();
2267 *overlaps_b = conflict_fn_not_known ();
2268 *last_conflicts = chrec_dont_know;
2269 return;
2270 }
2271
2272 niter = MIN (niter_x, niter_z);
2273 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2274 &overlaps_a_xz,
2275 &overlaps_b_xz,
2276 &last_conflicts_xz, 1);
2277 niter = MIN (niter_y, niter_z);
2278 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2279 &overlaps_a_yz,
2280 &overlaps_b_yz,
2281 &last_conflicts_yz, 2);
2282 niter = MIN (niter_x, niter_z);
2283 niter = MIN (niter_y, niter);
2284 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2285 &overlaps_a_xyz,
2286 &overlaps_b_xyz,
2287 &last_conflicts_xyz, 3);
2288
2289 xz_p = !integer_zerop (last_conflicts_xz);
2290 yz_p = !integer_zerop (last_conflicts_yz);
2291 xyz_p = !integer_zerop (last_conflicts_xyz);
2292
2293 if (xz_p || yz_p || xyz_p)
2294 {
2295 ova1 = affine_fn_cst (integer_zero_node);
2296 ova2 = affine_fn_cst (integer_zero_node);
2297 ovb = affine_fn_cst (integer_zero_node);
2298 if (xz_p)
2299 {
2300 affine_fn t0 = ova1;
2301 affine_fn t2 = ovb;
2302
2303 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2304 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2305 affine_fn_free (t0);
2306 affine_fn_free (t2);
2307 *last_conflicts = last_conflicts_xz;
2308 }
2309 if (yz_p)
2310 {
2311 affine_fn t0 = ova2;
2312 affine_fn t2 = ovb;
2313
2314 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2315 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2316 affine_fn_free (t0);
2317 affine_fn_free (t2);
2318 *last_conflicts = last_conflicts_yz;
2319 }
2320 if (xyz_p)
2321 {
2322 affine_fn t0 = ova1;
2323 affine_fn t2 = ova2;
2324 affine_fn t4 = ovb;
2325
2326 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2327 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2328 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2329 affine_fn_free (t0);
2330 affine_fn_free (t2);
2331 affine_fn_free (t4);
2332 *last_conflicts = last_conflicts_xyz;
2333 }
2334 *overlaps_a = conflict_fn (2, ova1, ova2);
2335 *overlaps_b = conflict_fn (1, ovb);
2336 }
2337 else
2338 {
2339 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2340 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2341 *last_conflicts = integer_zero_node;
2342 }
2343
2344 affine_fn_free (overlaps_a_xz);
2345 affine_fn_free (overlaps_b_xz);
2346 affine_fn_free (overlaps_a_yz);
2347 affine_fn_free (overlaps_b_yz);
2348 affine_fn_free (overlaps_a_xyz);
2349 affine_fn_free (overlaps_b_xyz);
2350 }
2351
2352 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2353
2354 static void
2355 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2356 int size)
2357 {
2358 memcpy (vec2, vec1, size * sizeof (*vec1));
2359 }
2360
2361 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2362
2363 static void
2364 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2365 int m, int n)
2366 {
2367 int i;
2368
2369 for (i = 0; i < m; i++)
2370 lambda_vector_copy (mat1[i], mat2[i], n);
2371 }
2372
2373 /* Store the N x N identity matrix in MAT. */
2374
2375 static void
2376 lambda_matrix_id (lambda_matrix mat, int size)
2377 {
2378 int i, j;
2379
2380 for (i = 0; i < size; i++)
2381 for (j = 0; j < size; j++)
2382 mat[i][j] = (i == j) ? 1 : 0;
2383 }
2384
2385 /* Return the first nonzero element of vector VEC1 between START and N.
2386 We must have START <= N. Returns N if VEC1 is the zero vector. */
2387
2388 static int
2389 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2390 {
2391 int j = start;
2392 while (j < n && vec1[j] == 0)
2393 j++;
2394 return j;
2395 }
2396
2397 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2398 R2 = R2 + CONST1 * R1. */
2399
2400 static void
2401 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2402 {
2403 int i;
2404
2405 if (const1 == 0)
2406 return;
2407
2408 for (i = 0; i < n; i++)
2409 mat[r2][i] += const1 * mat[r1][i];
2410 }
2411
2412 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2413 and store the result in VEC2. */
2414
2415 static void
2416 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2417 int size, int const1)
2418 {
2419 int i;
2420
2421 if (const1 == 0)
2422 lambda_vector_clear (vec2, size);
2423 else
2424 for (i = 0; i < size; i++)
2425 vec2[i] = const1 * vec1[i];
2426 }
2427
2428 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2429
2430 static void
2431 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2432 int size)
2433 {
2434 lambda_vector_mult_const (vec1, vec2, size, -1);
2435 }
2436
2437 /* Negate row R1 of matrix MAT which has N columns. */
2438
2439 static void
2440 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2441 {
2442 lambda_vector_negate (mat[r1], mat[r1], n);
2443 }
2444
2445 /* Return true if two vectors are equal. */
2446
2447 static bool
2448 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2449 {
2450 int i;
2451 for (i = 0; i < size; i++)
2452 if (vec1[i] != vec2[i])
2453 return false;
2454 return true;
2455 }
2456
2457 /* Given an M x N integer matrix A, this function determines an M x
2458 M unimodular matrix U, and an M x N echelon matrix S such that
2459 "U.A = S". This decomposition is also known as "right Hermite".
2460
2461 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2462 Restructuring Compilers" Utpal Banerjee. */
2463
2464 static void
2465 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2466 lambda_matrix S, lambda_matrix U)
2467 {
2468 int i, j, i0 = 0;
2469
2470 lambda_matrix_copy (A, S, m, n);
2471 lambda_matrix_id (U, m);
2472
2473 for (j = 0; j < n; j++)
2474 {
2475 if (lambda_vector_first_nz (S[j], m, i0) < m)
2476 {
2477 ++i0;
2478 for (i = m - 1; i >= i0; i--)
2479 {
2480 while (S[i][j] != 0)
2481 {
2482 int sigma, factor, a, b;
2483
2484 a = S[i-1][j];
2485 b = S[i][j];
2486 sigma = (a * b < 0) ? -1: 1;
2487 a = abs (a);
2488 b = abs (b);
2489 factor = sigma * (a / b);
2490
2491 lambda_matrix_row_add (S, n, i, i-1, -factor);
2492 std::swap (S[i], S[i-1]);
2493
2494 lambda_matrix_row_add (U, m, i, i-1, -factor);
2495 std::swap (U[i], U[i-1]);
2496 }
2497 }
2498 }
2499 }
2500 }
2501
2502 /* Determines the overlapping elements due to accesses CHREC_A and
2503 CHREC_B, that are affine functions. This function cannot handle
2504 symbolic evolution functions, ie. when initial conditions are
2505 parameters, because it uses lambda matrices of integers. */
2506
2507 static void
2508 analyze_subscript_affine_affine (tree chrec_a,
2509 tree chrec_b,
2510 conflict_function **overlaps_a,
2511 conflict_function **overlaps_b,
2512 tree *last_conflicts)
2513 {
2514 unsigned nb_vars_a, nb_vars_b, dim;
2515 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2516 lambda_matrix A, U, S;
2517 struct obstack scratch_obstack;
2518
2519 if (eq_evolutions_p (chrec_a, chrec_b))
2520 {
2521 /* The accessed index overlaps for each iteration in the
2522 loop. */
2523 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2524 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2525 *last_conflicts = chrec_dont_know;
2526 return;
2527 }
2528 if (dump_file && (dump_flags & TDF_DETAILS))
2529 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2530
2531 /* For determining the initial intersection, we have to solve a
2532 Diophantine equation. This is the most time consuming part.
2533
2534 For answering to the question: "Is there a dependence?" we have
2535 to prove that there exists a solution to the Diophantine
2536 equation, and that the solution is in the iteration domain,
2537 i.e. the solution is positive or zero, and that the solution
2538 happens before the upper bound loop.nb_iterations. Otherwise
2539 there is no dependence. This function outputs a description of
2540 the iterations that hold the intersections. */
2541
2542 nb_vars_a = nb_vars_in_chrec (chrec_a);
2543 nb_vars_b = nb_vars_in_chrec (chrec_b);
2544
2545 gcc_obstack_init (&scratch_obstack);
2546
2547 dim = nb_vars_a + nb_vars_b;
2548 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2549 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2550 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2551
2552 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2553 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2554 gamma = init_b - init_a;
2555
2556 /* Don't do all the hard work of solving the Diophantine equation
2557 when we already know the solution: for example,
2558 | {3, +, 1}_1
2559 | {3, +, 4}_2
2560 | gamma = 3 - 3 = 0.
2561 Then the first overlap occurs during the first iterations:
2562 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2563 */
2564 if (gamma == 0)
2565 {
2566 if (nb_vars_a == 1 && nb_vars_b == 1)
2567 {
2568 HOST_WIDE_INT step_a, step_b;
2569 HOST_WIDE_INT niter, niter_a, niter_b;
2570 affine_fn ova, ovb;
2571
2572 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2573 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2574 niter = MIN (niter_a, niter_b);
2575 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2576 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2577
2578 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2579 &ova, &ovb,
2580 last_conflicts, 1);
2581 *overlaps_a = conflict_fn (1, ova);
2582 *overlaps_b = conflict_fn (1, ovb);
2583 }
2584
2585 else if (nb_vars_a == 2 && nb_vars_b == 1)
2586 compute_overlap_steps_for_affine_1_2
2587 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2588
2589 else if (nb_vars_a == 1 && nb_vars_b == 2)
2590 compute_overlap_steps_for_affine_1_2
2591 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2592
2593 else
2594 {
2595 if (dump_file && (dump_flags & TDF_DETAILS))
2596 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2597 *overlaps_a = conflict_fn_not_known ();
2598 *overlaps_b = conflict_fn_not_known ();
2599 *last_conflicts = chrec_dont_know;
2600 }
2601 goto end_analyze_subs_aa;
2602 }
2603
2604 /* U.A = S */
2605 lambda_matrix_right_hermite (A, dim, 1, S, U);
2606
2607 if (S[0][0] < 0)
2608 {
2609 S[0][0] *= -1;
2610 lambda_matrix_row_negate (U, dim, 0);
2611 }
2612 gcd_alpha_beta = S[0][0];
2613
2614 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2615 but that is a quite strange case. Instead of ICEing, answer
2616 don't know. */
2617 if (gcd_alpha_beta == 0)
2618 {
2619 *overlaps_a = conflict_fn_not_known ();
2620 *overlaps_b = conflict_fn_not_known ();
2621 *last_conflicts = chrec_dont_know;
2622 goto end_analyze_subs_aa;
2623 }
2624
2625 /* The classic "gcd-test". */
2626 if (!int_divides_p (gcd_alpha_beta, gamma))
2627 {
2628 /* The "gcd-test" has determined that there is no integer
2629 solution, i.e. there is no dependence. */
2630 *overlaps_a = conflict_fn_no_dependence ();
2631 *overlaps_b = conflict_fn_no_dependence ();
2632 *last_conflicts = integer_zero_node;
2633 }
2634
2635 /* Both access functions are univariate. This includes SIV and MIV cases. */
2636 else if (nb_vars_a == 1 && nb_vars_b == 1)
2637 {
2638 /* Both functions should have the same evolution sign. */
2639 if (((A[0][0] > 0 && -A[1][0] > 0)
2640 || (A[0][0] < 0 && -A[1][0] < 0)))
2641 {
2642 /* The solutions are given by:
2643 |
2644 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2645 | [u21 u22] [y0]
2646
2647 For a given integer t. Using the following variables,
2648
2649 | i0 = u11 * gamma / gcd_alpha_beta
2650 | j0 = u12 * gamma / gcd_alpha_beta
2651 | i1 = u21
2652 | j1 = u22
2653
2654 the solutions are:
2655
2656 | x0 = i0 + i1 * t,
2657 | y0 = j0 + j1 * t. */
2658 HOST_WIDE_INT i0, j0, i1, j1;
2659
2660 i0 = U[0][0] * gamma / gcd_alpha_beta;
2661 j0 = U[0][1] * gamma / gcd_alpha_beta;
2662 i1 = U[1][0];
2663 j1 = U[1][1];
2664
2665 if ((i1 == 0 && i0 < 0)
2666 || (j1 == 0 && j0 < 0))
2667 {
2668 /* There is no solution.
2669 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2670 falls in here, but for the moment we don't look at the
2671 upper bound of the iteration domain. */
2672 *overlaps_a = conflict_fn_no_dependence ();
2673 *overlaps_b = conflict_fn_no_dependence ();
2674 *last_conflicts = integer_zero_node;
2675 goto end_analyze_subs_aa;
2676 }
2677
2678 if (i1 > 0 && j1 > 0)
2679 {
2680 HOST_WIDE_INT niter_a
2681 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2682 HOST_WIDE_INT niter_b
2683 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2684 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2685
2686 /* (X0, Y0) is a solution of the Diophantine equation:
2687 "chrec_a (X0) = chrec_b (Y0)". */
2688 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2689 CEIL (-j0, j1));
2690 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2691 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2692
2693 /* (X1, Y1) is the smallest positive solution of the eq
2694 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2695 first conflict occurs. */
2696 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2697 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2698 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2699
2700 if (niter > 0)
2701 {
2702 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2703 FLOOR_DIV (niter - j0, j1));
2704 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2705
2706 /* If the overlap occurs outside of the bounds of the
2707 loop, there is no dependence. */
2708 if (x1 >= niter || y1 >= niter)
2709 {
2710 *overlaps_a = conflict_fn_no_dependence ();
2711 *overlaps_b = conflict_fn_no_dependence ();
2712 *last_conflicts = integer_zero_node;
2713 goto end_analyze_subs_aa;
2714 }
2715 else
2716 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2717 }
2718 else
2719 *last_conflicts = chrec_dont_know;
2720
2721 *overlaps_a
2722 = conflict_fn (1,
2723 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2724 1,
2725 build_int_cst (NULL_TREE, i1)));
2726 *overlaps_b
2727 = conflict_fn (1,
2728 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2729 1,
2730 build_int_cst (NULL_TREE, j1)));
2731 }
2732 else
2733 {
2734 /* FIXME: For the moment, the upper bound of the
2735 iteration domain for i and j is not checked. */
2736 if (dump_file && (dump_flags & TDF_DETAILS))
2737 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2738 *overlaps_a = conflict_fn_not_known ();
2739 *overlaps_b = conflict_fn_not_known ();
2740 *last_conflicts = chrec_dont_know;
2741 }
2742 }
2743 else
2744 {
2745 if (dump_file && (dump_flags & TDF_DETAILS))
2746 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2747 *overlaps_a = conflict_fn_not_known ();
2748 *overlaps_b = conflict_fn_not_known ();
2749 *last_conflicts = chrec_dont_know;
2750 }
2751 }
2752 else
2753 {
2754 if (dump_file && (dump_flags & TDF_DETAILS))
2755 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2756 *overlaps_a = conflict_fn_not_known ();
2757 *overlaps_b = conflict_fn_not_known ();
2758 *last_conflicts = chrec_dont_know;
2759 }
2760
2761 end_analyze_subs_aa:
2762 obstack_free (&scratch_obstack, NULL);
2763 if (dump_file && (dump_flags & TDF_DETAILS))
2764 {
2765 fprintf (dump_file, " (overlaps_a = ");
2766 dump_conflict_function (dump_file, *overlaps_a);
2767 fprintf (dump_file, ")\n (overlaps_b = ");
2768 dump_conflict_function (dump_file, *overlaps_b);
2769 fprintf (dump_file, "))\n");
2770 }
2771 }
2772
2773 /* Returns true when analyze_subscript_affine_affine can be used for
2774 determining the dependence relation between chrec_a and chrec_b,
2775 that contain symbols. This function modifies chrec_a and chrec_b
2776 such that the analysis result is the same, and such that they don't
2777 contain symbols, and then can safely be passed to the analyzer.
2778
2779 Example: The analysis of the following tuples of evolutions produce
2780 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2781 vs. {0, +, 1}_1
2782
2783 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2784 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2785 */
2786
2787 static bool
2788 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2789 {
2790 tree diff, type, left_a, left_b, right_b;
2791
2792 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2793 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2794 /* FIXME: For the moment not handled. Might be refined later. */
2795 return false;
2796
2797 type = chrec_type (*chrec_a);
2798 left_a = CHREC_LEFT (*chrec_a);
2799 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2800 diff = chrec_fold_minus (type, left_a, left_b);
2801
2802 if (!evolution_function_is_constant_p (diff))
2803 return false;
2804
2805 if (dump_file && (dump_flags & TDF_DETAILS))
2806 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2807
2808 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2809 diff, CHREC_RIGHT (*chrec_a));
2810 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2811 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2812 build_int_cst (type, 0),
2813 right_b);
2814 return true;
2815 }
2816
2817 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2818 *OVERLAPS_B are initialized to the functions that describe the
2819 relation between the elements accessed twice by CHREC_A and
2820 CHREC_B. For k >= 0, the following property is verified:
2821
2822 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2823
2824 static void
2825 analyze_siv_subscript (tree chrec_a,
2826 tree chrec_b,
2827 conflict_function **overlaps_a,
2828 conflict_function **overlaps_b,
2829 tree *last_conflicts,
2830 int loop_nest_num)
2831 {
2832 dependence_stats.num_siv++;
2833
2834 if (dump_file && (dump_flags & TDF_DETAILS))
2835 fprintf (dump_file, "(analyze_siv_subscript \n");
2836
2837 if (evolution_function_is_constant_p (chrec_a)
2838 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2839 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2840 overlaps_a, overlaps_b, last_conflicts);
2841
2842 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2843 && evolution_function_is_constant_p (chrec_b))
2844 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2845 overlaps_b, overlaps_a, last_conflicts);
2846
2847 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2848 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2849 {
2850 if (!chrec_contains_symbols (chrec_a)
2851 && !chrec_contains_symbols (chrec_b))
2852 {
2853 analyze_subscript_affine_affine (chrec_a, chrec_b,
2854 overlaps_a, overlaps_b,
2855 last_conflicts);
2856
2857 if (CF_NOT_KNOWN_P (*overlaps_a)
2858 || CF_NOT_KNOWN_P (*overlaps_b))
2859 dependence_stats.num_siv_unimplemented++;
2860 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2861 || CF_NO_DEPENDENCE_P (*overlaps_b))
2862 dependence_stats.num_siv_independent++;
2863 else
2864 dependence_stats.num_siv_dependent++;
2865 }
2866 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2867 &chrec_b))
2868 {
2869 analyze_subscript_affine_affine (chrec_a, chrec_b,
2870 overlaps_a, overlaps_b,
2871 last_conflicts);
2872
2873 if (CF_NOT_KNOWN_P (*overlaps_a)
2874 || CF_NOT_KNOWN_P (*overlaps_b))
2875 dependence_stats.num_siv_unimplemented++;
2876 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2877 || CF_NO_DEPENDENCE_P (*overlaps_b))
2878 dependence_stats.num_siv_independent++;
2879 else
2880 dependence_stats.num_siv_dependent++;
2881 }
2882 else
2883 goto siv_subscript_dontknow;
2884 }
2885
2886 else
2887 {
2888 siv_subscript_dontknow:;
2889 if (dump_file && (dump_flags & TDF_DETAILS))
2890 fprintf (dump_file, " siv test failed: unimplemented");
2891 *overlaps_a = conflict_fn_not_known ();
2892 *overlaps_b = conflict_fn_not_known ();
2893 *last_conflicts = chrec_dont_know;
2894 dependence_stats.num_siv_unimplemented++;
2895 }
2896
2897 if (dump_file && (dump_flags & TDF_DETAILS))
2898 fprintf (dump_file, ")\n");
2899 }
2900
2901 /* Returns false if we can prove that the greatest common divisor of the steps
2902 of CHREC does not divide CST, false otherwise. */
2903
2904 static bool
2905 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2906 {
2907 HOST_WIDE_INT cd = 0, val;
2908 tree step;
2909
2910 if (!tree_fits_shwi_p (cst))
2911 return true;
2912 val = tree_to_shwi (cst);
2913
2914 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2915 {
2916 step = CHREC_RIGHT (chrec);
2917 if (!tree_fits_shwi_p (step))
2918 return true;
2919 cd = gcd (cd, tree_to_shwi (step));
2920 chrec = CHREC_LEFT (chrec);
2921 }
2922
2923 return val % cd == 0;
2924 }
2925
2926 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2927 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2928 functions that describe the relation between the elements accessed
2929 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2930 is verified:
2931
2932 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2933
2934 static void
2935 analyze_miv_subscript (tree chrec_a,
2936 tree chrec_b,
2937 conflict_function **overlaps_a,
2938 conflict_function **overlaps_b,
2939 tree *last_conflicts,
2940 struct loop *loop_nest)
2941 {
2942 tree type, difference;
2943
2944 dependence_stats.num_miv++;
2945 if (dump_file && (dump_flags & TDF_DETAILS))
2946 fprintf (dump_file, "(analyze_miv_subscript \n");
2947
2948 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2949 chrec_a = chrec_convert (type, chrec_a, NULL);
2950 chrec_b = chrec_convert (type, chrec_b, NULL);
2951 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2952
2953 if (eq_evolutions_p (chrec_a, chrec_b))
2954 {
2955 /* Access functions are the same: all the elements are accessed
2956 in the same order. */
2957 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2958 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2959 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2960 dependence_stats.num_miv_dependent++;
2961 }
2962
2963 else if (evolution_function_is_constant_p (difference)
2964 /* For the moment, the following is verified:
2965 evolution_function_is_affine_multivariate_p (chrec_a,
2966 loop_nest->num) */
2967 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2968 {
2969 /* testsuite/.../ssa-chrec-33.c
2970 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2971
2972 The difference is 1, and all the evolution steps are multiples
2973 of 2, consequently there are no overlapping elements. */
2974 *overlaps_a = conflict_fn_no_dependence ();
2975 *overlaps_b = conflict_fn_no_dependence ();
2976 *last_conflicts = integer_zero_node;
2977 dependence_stats.num_miv_independent++;
2978 }
2979
2980 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2981 && !chrec_contains_symbols (chrec_a)
2982 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2983 && !chrec_contains_symbols (chrec_b))
2984 {
2985 /* testsuite/.../ssa-chrec-35.c
2986 {0, +, 1}_2 vs. {0, +, 1}_3
2987 the overlapping elements are respectively located at iterations:
2988 {0, +, 1}_x and {0, +, 1}_x,
2989 in other words, we have the equality:
2990 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2991
2992 Other examples:
2993 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2994 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2995
2996 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2997 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2998 */
2999 analyze_subscript_affine_affine (chrec_a, chrec_b,
3000 overlaps_a, overlaps_b, last_conflicts);
3001
3002 if (CF_NOT_KNOWN_P (*overlaps_a)
3003 || CF_NOT_KNOWN_P (*overlaps_b))
3004 dependence_stats.num_miv_unimplemented++;
3005 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3006 || CF_NO_DEPENDENCE_P (*overlaps_b))
3007 dependence_stats.num_miv_independent++;
3008 else
3009 dependence_stats.num_miv_dependent++;
3010 }
3011
3012 else
3013 {
3014 /* When the analysis is too difficult, answer "don't know". */
3015 if (dump_file && (dump_flags & TDF_DETAILS))
3016 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3017
3018 *overlaps_a = conflict_fn_not_known ();
3019 *overlaps_b = conflict_fn_not_known ();
3020 *last_conflicts = chrec_dont_know;
3021 dependence_stats.num_miv_unimplemented++;
3022 }
3023
3024 if (dump_file && (dump_flags & TDF_DETAILS))
3025 fprintf (dump_file, ")\n");
3026 }
3027
3028 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3029 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3030 OVERLAP_ITERATIONS_B are initialized with two functions that
3031 describe the iterations that contain conflicting elements.
3032
3033 Remark: For an integer k >= 0, the following equality is true:
3034
3035 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3036 */
3037
3038 static void
3039 analyze_overlapping_iterations (tree chrec_a,
3040 tree chrec_b,
3041 conflict_function **overlap_iterations_a,
3042 conflict_function **overlap_iterations_b,
3043 tree *last_conflicts, struct loop *loop_nest)
3044 {
3045 unsigned int lnn = loop_nest->num;
3046
3047 dependence_stats.num_subscript_tests++;
3048
3049 if (dump_file && (dump_flags & TDF_DETAILS))
3050 {
3051 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3052 fprintf (dump_file, " (chrec_a = ");
3053 print_generic_expr (dump_file, chrec_a, 0);
3054 fprintf (dump_file, ")\n (chrec_b = ");
3055 print_generic_expr (dump_file, chrec_b, 0);
3056 fprintf (dump_file, ")\n");
3057 }
3058
3059 if (chrec_a == NULL_TREE
3060 || chrec_b == NULL_TREE
3061 || chrec_contains_undetermined (chrec_a)
3062 || chrec_contains_undetermined (chrec_b))
3063 {
3064 dependence_stats.num_subscript_undetermined++;
3065
3066 *overlap_iterations_a = conflict_fn_not_known ();
3067 *overlap_iterations_b = conflict_fn_not_known ();
3068 }
3069
3070 /* If they are the same chrec, and are affine, they overlap
3071 on every iteration. */
3072 else if (eq_evolutions_p (chrec_a, chrec_b)
3073 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3074 || operand_equal_p (chrec_a, chrec_b, 0)))
3075 {
3076 dependence_stats.num_same_subscript_function++;
3077 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3078 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3079 *last_conflicts = chrec_dont_know;
3080 }
3081
3082 /* If they aren't the same, and aren't affine, we can't do anything
3083 yet. */
3084 else if ((chrec_contains_symbols (chrec_a)
3085 || chrec_contains_symbols (chrec_b))
3086 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3087 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3088 {
3089 dependence_stats.num_subscript_undetermined++;
3090 *overlap_iterations_a = conflict_fn_not_known ();
3091 *overlap_iterations_b = conflict_fn_not_known ();
3092 }
3093
3094 else if (ziv_subscript_p (chrec_a, chrec_b))
3095 analyze_ziv_subscript (chrec_a, chrec_b,
3096 overlap_iterations_a, overlap_iterations_b,
3097 last_conflicts);
3098
3099 else if (siv_subscript_p (chrec_a, chrec_b))
3100 analyze_siv_subscript (chrec_a, chrec_b,
3101 overlap_iterations_a, overlap_iterations_b,
3102 last_conflicts, lnn);
3103
3104 else
3105 analyze_miv_subscript (chrec_a, chrec_b,
3106 overlap_iterations_a, overlap_iterations_b,
3107 last_conflicts, loop_nest);
3108
3109 if (dump_file && (dump_flags & TDF_DETAILS))
3110 {
3111 fprintf (dump_file, " (overlap_iterations_a = ");
3112 dump_conflict_function (dump_file, *overlap_iterations_a);
3113 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3114 dump_conflict_function (dump_file, *overlap_iterations_b);
3115 fprintf (dump_file, "))\n");
3116 }
3117 }
3118
3119 /* Helper function for uniquely inserting distance vectors. */
3120
3121 static void
3122 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3123 {
3124 unsigned i;
3125 lambda_vector v;
3126
3127 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3128 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3129 return;
3130
3131 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3132 }
3133
3134 /* Helper function for uniquely inserting direction vectors. */
3135
3136 static void
3137 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3138 {
3139 unsigned i;
3140 lambda_vector v;
3141
3142 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3143 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3144 return;
3145
3146 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3147 }
3148
3149 /* Add a distance of 1 on all the loops outer than INDEX. If we
3150 haven't yet determined a distance for this outer loop, push a new
3151 distance vector composed of the previous distance, and a distance
3152 of 1 for this outer loop. Example:
3153
3154 | loop_1
3155 | loop_2
3156 | A[10]
3157 | endloop_2
3158 | endloop_1
3159
3160 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3161 save (0, 1), then we have to save (1, 0). */
3162
3163 static void
3164 add_outer_distances (struct data_dependence_relation *ddr,
3165 lambda_vector dist_v, int index)
3166 {
3167 /* For each outer loop where init_v is not set, the accesses are
3168 in dependence of distance 1 in the loop. */
3169 while (--index >= 0)
3170 {
3171 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3172 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3173 save_v[index] = 1;
3174 save_dist_v (ddr, save_v);
3175 }
3176 }
3177
3178 /* Return false when fail to represent the data dependence as a
3179 distance vector. INIT_B is set to true when a component has been
3180 added to the distance vector DIST_V. INDEX_CARRY is then set to
3181 the index in DIST_V that carries the dependence. */
3182
3183 static bool
3184 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3185 struct data_reference *ddr_a,
3186 struct data_reference *ddr_b,
3187 lambda_vector dist_v, bool *init_b,
3188 int *index_carry)
3189 {
3190 unsigned i;
3191 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3192
3193 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3194 {
3195 tree access_fn_a, access_fn_b;
3196 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3197
3198 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3199 {
3200 non_affine_dependence_relation (ddr);
3201 return false;
3202 }
3203
3204 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3205 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3206
3207 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3208 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3209 {
3210 int dist, index;
3211 int var_a = CHREC_VARIABLE (access_fn_a);
3212 int var_b = CHREC_VARIABLE (access_fn_b);
3213
3214 if (var_a != var_b
3215 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3216 {
3217 non_affine_dependence_relation (ddr);
3218 return false;
3219 }
3220
3221 dist = int_cst_value (SUB_DISTANCE (subscript));
3222 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3223 *index_carry = MIN (index, *index_carry);
3224
3225 /* This is the subscript coupling test. If we have already
3226 recorded a distance for this loop (a distance coming from
3227 another subscript), it should be the same. For example,
3228 in the following code, there is no dependence:
3229
3230 | loop i = 0, N, 1
3231 | T[i+1][i] = ...
3232 | ... = T[i][i]
3233 | endloop
3234 */
3235 if (init_v[index] != 0 && dist_v[index] != dist)
3236 {
3237 finalize_ddr_dependent (ddr, chrec_known);
3238 return false;
3239 }
3240
3241 dist_v[index] = dist;
3242 init_v[index] = 1;
3243 *init_b = true;
3244 }
3245 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3246 {
3247 /* This can be for example an affine vs. constant dependence
3248 (T[i] vs. T[3]) that is not an affine dependence and is
3249 not representable as a distance vector. */
3250 non_affine_dependence_relation (ddr);
3251 return false;
3252 }
3253 }
3254
3255 return true;
3256 }
3257
3258 /* Return true when the DDR contains only constant access functions. */
3259
3260 static bool
3261 constant_access_functions (const struct data_dependence_relation *ddr)
3262 {
3263 unsigned i;
3264
3265 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3266 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3267 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3268 return false;
3269
3270 return true;
3271 }
3272
3273 /* Helper function for the case where DDR_A and DDR_B are the same
3274 multivariate access function with a constant step. For an example
3275 see pr34635-1.c. */
3276
3277 static void
3278 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3279 {
3280 int x_1, x_2;
3281 tree c_1 = CHREC_LEFT (c_2);
3282 tree c_0 = CHREC_LEFT (c_1);
3283 lambda_vector dist_v;
3284 int v1, v2, cd;
3285
3286 /* Polynomials with more than 2 variables are not handled yet. When
3287 the evolution steps are parameters, it is not possible to
3288 represent the dependence using classical distance vectors. */
3289 if (TREE_CODE (c_0) != INTEGER_CST
3290 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3291 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3292 {
3293 DDR_AFFINE_P (ddr) = false;
3294 return;
3295 }
3296
3297 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3298 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3299
3300 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3301 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3302 v1 = int_cst_value (CHREC_RIGHT (c_1));
3303 v2 = int_cst_value (CHREC_RIGHT (c_2));
3304 cd = gcd (v1, v2);
3305 v1 /= cd;
3306 v2 /= cd;
3307
3308 if (v2 < 0)
3309 {
3310 v2 = -v2;
3311 v1 = -v1;
3312 }
3313
3314 dist_v[x_1] = v2;
3315 dist_v[x_2] = -v1;
3316 save_dist_v (ddr, dist_v);
3317
3318 add_outer_distances (ddr, dist_v, x_1);
3319 }
3320
3321 /* Helper function for the case where DDR_A and DDR_B are the same
3322 access functions. */
3323
3324 static void
3325 add_other_self_distances (struct data_dependence_relation *ddr)
3326 {
3327 lambda_vector dist_v;
3328 unsigned i;
3329 int index_carry = DDR_NB_LOOPS (ddr);
3330
3331 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3332 {
3333 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3334
3335 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3336 {
3337 if (!evolution_function_is_univariate_p (access_fun))
3338 {
3339 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3340 {
3341 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3342 return;
3343 }
3344
3345 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3346
3347 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3348 add_multivariate_self_dist (ddr, access_fun);
3349 else
3350 /* The evolution step is not constant: it varies in
3351 the outer loop, so this cannot be represented by a
3352 distance vector. For example in pr34635.c the
3353 evolution is {0, +, {0, +, 4}_1}_2. */
3354 DDR_AFFINE_P (ddr) = false;
3355
3356 return;
3357 }
3358
3359 index_carry = MIN (index_carry,
3360 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3361 DDR_LOOP_NEST (ddr)));
3362 }
3363 }
3364
3365 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3366 add_outer_distances (ddr, dist_v, index_carry);
3367 }
3368
3369 static void
3370 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3371 {
3372 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3373
3374 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3375 save_dist_v (ddr, dist_v);
3376 }
3377
3378 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3379 is the case for example when access functions are the same and
3380 equal to a constant, as in:
3381
3382 | loop_1
3383 | A[3] = ...
3384 | ... = A[3]
3385 | endloop_1
3386
3387 in which case the distance vectors are (0) and (1). */
3388
3389 static void
3390 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3391 {
3392 unsigned i, j;
3393
3394 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3395 {
3396 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3397 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3398 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3399
3400 for (j = 0; j < ca->n; j++)
3401 if (affine_function_zero_p (ca->fns[j]))
3402 {
3403 insert_innermost_unit_dist_vector (ddr);
3404 return;
3405 }
3406
3407 for (j = 0; j < cb->n; j++)
3408 if (affine_function_zero_p (cb->fns[j]))
3409 {
3410 insert_innermost_unit_dist_vector (ddr);
3411 return;
3412 }
3413 }
3414 }
3415
3416 /* Compute the classic per loop distance vector. DDR is the data
3417 dependence relation to build a vector from. Return false when fail
3418 to represent the data dependence as a distance vector. */
3419
3420 static bool
3421 build_classic_dist_vector (struct data_dependence_relation *ddr,
3422 struct loop *loop_nest)
3423 {
3424 bool init_b = false;
3425 int index_carry = DDR_NB_LOOPS (ddr);
3426 lambda_vector dist_v;
3427
3428 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3429 return false;
3430
3431 if (same_access_functions (ddr))
3432 {
3433 /* Save the 0 vector. */
3434 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3435 save_dist_v (ddr, dist_v);
3436
3437 if (constant_access_functions (ddr))
3438 add_distance_for_zero_overlaps (ddr);
3439
3440 if (DDR_NB_LOOPS (ddr) > 1)
3441 add_other_self_distances (ddr);
3442
3443 return true;
3444 }
3445
3446 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3447 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3448 dist_v, &init_b, &index_carry))
3449 return false;
3450
3451 /* Save the distance vector if we initialized one. */
3452 if (init_b)
3453 {
3454 /* Verify a basic constraint: classic distance vectors should
3455 always be lexicographically positive.
3456
3457 Data references are collected in the order of execution of
3458 the program, thus for the following loop
3459
3460 | for (i = 1; i < 100; i++)
3461 | for (j = 1; j < 100; j++)
3462 | {
3463 | t = T[j+1][i-1]; // A
3464 | T[j][i] = t + 2; // B
3465 | }
3466
3467 references are collected following the direction of the wind:
3468 A then B. The data dependence tests are performed also
3469 following this order, such that we're looking at the distance
3470 separating the elements accessed by A from the elements later
3471 accessed by B. But in this example, the distance returned by
3472 test_dep (A, B) is lexicographically negative (-1, 1), that
3473 means that the access A occurs later than B with respect to
3474 the outer loop, ie. we're actually looking upwind. In this
3475 case we solve test_dep (B, A) looking downwind to the
3476 lexicographically positive solution, that returns the
3477 distance vector (1, -1). */
3478 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3479 {
3480 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3481 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3482 loop_nest))
3483 return false;
3484 compute_subscript_distance (ddr);
3485 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3486 save_v, &init_b, &index_carry))
3487 return false;
3488 save_dist_v (ddr, save_v);
3489 DDR_REVERSED_P (ddr) = true;
3490
3491 /* In this case there is a dependence forward for all the
3492 outer loops:
3493
3494 | for (k = 1; k < 100; k++)
3495 | for (i = 1; i < 100; i++)
3496 | for (j = 1; j < 100; j++)
3497 | {
3498 | t = T[j+1][i-1]; // A
3499 | T[j][i] = t + 2; // B
3500 | }
3501
3502 the vectors are:
3503 (0, 1, -1)
3504 (1, 1, -1)
3505 (1, -1, 1)
3506 */
3507 if (DDR_NB_LOOPS (ddr) > 1)
3508 {
3509 add_outer_distances (ddr, save_v, index_carry);
3510 add_outer_distances (ddr, dist_v, index_carry);
3511 }
3512 }
3513 else
3514 {
3515 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3516 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3517
3518 if (DDR_NB_LOOPS (ddr) > 1)
3519 {
3520 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3521
3522 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3523 DDR_A (ddr), loop_nest))
3524 return false;
3525 compute_subscript_distance (ddr);
3526 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3527 opposite_v, &init_b,
3528 &index_carry))
3529 return false;
3530
3531 save_dist_v (ddr, save_v);
3532 add_outer_distances (ddr, dist_v, index_carry);
3533 add_outer_distances (ddr, opposite_v, index_carry);
3534 }
3535 else
3536 save_dist_v (ddr, save_v);
3537 }
3538 }
3539 else
3540 {
3541 /* There is a distance of 1 on all the outer loops: Example:
3542 there is a dependence of distance 1 on loop_1 for the array A.
3543
3544 | loop_1
3545 | A[5] = ...
3546 | endloop
3547 */
3548 add_outer_distances (ddr, dist_v,
3549 lambda_vector_first_nz (dist_v,
3550 DDR_NB_LOOPS (ddr), 0));
3551 }
3552
3553 if (dump_file && (dump_flags & TDF_DETAILS))
3554 {
3555 unsigned i;
3556
3557 fprintf (dump_file, "(build_classic_dist_vector\n");
3558 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3559 {
3560 fprintf (dump_file, " dist_vector = (");
3561 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3562 DDR_NB_LOOPS (ddr));
3563 fprintf (dump_file, " )\n");
3564 }
3565 fprintf (dump_file, ")\n");
3566 }
3567
3568 return true;
3569 }
3570
3571 /* Return the direction for a given distance.
3572 FIXME: Computing dir this way is suboptimal, since dir can catch
3573 cases that dist is unable to represent. */
3574
3575 static inline enum data_dependence_direction
3576 dir_from_dist (int dist)
3577 {
3578 if (dist > 0)
3579 return dir_positive;
3580 else if (dist < 0)
3581 return dir_negative;
3582 else
3583 return dir_equal;
3584 }
3585
3586 /* Compute the classic per loop direction vector. DDR is the data
3587 dependence relation to build a vector from. */
3588
3589 static void
3590 build_classic_dir_vector (struct data_dependence_relation *ddr)
3591 {
3592 unsigned i, j;
3593 lambda_vector dist_v;
3594
3595 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3596 {
3597 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3598
3599 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3600 dir_v[j] = dir_from_dist (dist_v[j]);
3601
3602 save_dir_v (ddr, dir_v);
3603 }
3604 }
3605
3606 /* Helper function. Returns true when there is a dependence between
3607 data references DRA and DRB. */
3608
3609 static bool
3610 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3611 struct data_reference *dra,
3612 struct data_reference *drb,
3613 struct loop *loop_nest)
3614 {
3615 unsigned int i;
3616 tree last_conflicts;
3617 struct subscript *subscript;
3618 tree res = NULL_TREE;
3619
3620 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3621 {
3622 conflict_function *overlaps_a, *overlaps_b;
3623
3624 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3625 DR_ACCESS_FN (drb, i),
3626 &overlaps_a, &overlaps_b,
3627 &last_conflicts, loop_nest);
3628
3629 if (SUB_CONFLICTS_IN_A (subscript))
3630 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3631 if (SUB_CONFLICTS_IN_B (subscript))
3632 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3633
3634 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3635 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3636 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3637
3638 /* If there is any undetermined conflict function we have to
3639 give a conservative answer in case we cannot prove that
3640 no dependence exists when analyzing another subscript. */
3641 if (CF_NOT_KNOWN_P (overlaps_a)
3642 || CF_NOT_KNOWN_P (overlaps_b))
3643 {
3644 res = chrec_dont_know;
3645 continue;
3646 }
3647
3648 /* When there is a subscript with no dependence we can stop. */
3649 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3650 || CF_NO_DEPENDENCE_P (overlaps_b))
3651 {
3652 res = chrec_known;
3653 break;
3654 }
3655 }
3656
3657 if (res == NULL_TREE)
3658 return true;
3659
3660 if (res == chrec_known)
3661 dependence_stats.num_dependence_independent++;
3662 else
3663 dependence_stats.num_dependence_undetermined++;
3664 finalize_ddr_dependent (ddr, res);
3665 return false;
3666 }
3667
3668 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3669
3670 static void
3671 subscript_dependence_tester (struct data_dependence_relation *ddr,
3672 struct loop *loop_nest)
3673 {
3674 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3675 dependence_stats.num_dependence_dependent++;
3676
3677 compute_subscript_distance (ddr);
3678 if (build_classic_dist_vector (ddr, loop_nest))
3679 build_classic_dir_vector (ddr);
3680 }
3681
3682 /* Returns true when all the access functions of A are affine or
3683 constant with respect to LOOP_NEST. */
3684
3685 static bool
3686 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3687 const struct loop *loop_nest)
3688 {
3689 unsigned int i;
3690 vec<tree> fns = DR_ACCESS_FNS (a);
3691 tree t;
3692
3693 FOR_EACH_VEC_ELT (fns, i, t)
3694 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3695 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3696 return false;
3697
3698 return true;
3699 }
3700
3701 /* Initializes an equation for an OMEGA problem using the information
3702 contained in the ACCESS_FUN. Returns true when the operation
3703 succeeded.
3704
3705 PB is the omega constraint system.
3706 EQ is the number of the equation to be initialized.
3707 OFFSET is used for shifting the variables names in the constraints:
3708 a constrain is composed of 2 * the number of variables surrounding
3709 dependence accesses. OFFSET is set either to 0 for the first n variables,
3710 then it is set to n.
3711 ACCESS_FUN is expected to be an affine chrec. */
3712
3713 static bool
3714 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3715 unsigned int offset, tree access_fun,
3716 struct data_dependence_relation *ddr)
3717 {
3718 switch (TREE_CODE (access_fun))
3719 {
3720 case POLYNOMIAL_CHREC:
3721 {
3722 tree left = CHREC_LEFT (access_fun);
3723 tree right = CHREC_RIGHT (access_fun);
3724 int var = CHREC_VARIABLE (access_fun);
3725 unsigned var_idx;
3726
3727 if (TREE_CODE (right) != INTEGER_CST)
3728 return false;
3729
3730 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3731 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3732
3733 /* Compute the innermost loop index. */
3734 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3735
3736 if (offset == 0)
3737 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3738 += int_cst_value (right);
3739
3740 switch (TREE_CODE (left))
3741 {
3742 case POLYNOMIAL_CHREC:
3743 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3744
3745 case INTEGER_CST:
3746 pb->eqs[eq].coef[0] += int_cst_value (left);
3747 return true;
3748
3749 default:
3750 return false;
3751 }
3752 }
3753
3754 case INTEGER_CST:
3755 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3756 return true;
3757
3758 default:
3759 return false;
3760 }
3761 }
3762
3763 /* As explained in the comments preceding init_omega_for_ddr, we have
3764 to set up a system for each loop level, setting outer loops
3765 variation to zero, and current loop variation to positive or zero.
3766 Save each lexico positive distance vector. */
3767
3768 static void
3769 omega_extract_distance_vectors (omega_pb pb,
3770 struct data_dependence_relation *ddr)
3771 {
3772 int eq, geq;
3773 unsigned i, j;
3774 struct loop *loopi, *loopj;
3775 enum omega_result res;
3776
3777 /* Set a new problem for each loop in the nest. The basis is the
3778 problem that we have initialized until now. On top of this we
3779 add new constraints. */
3780 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3781 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3782 {
3783 int dist = 0;
3784 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3785 DDR_NB_LOOPS (ddr));
3786
3787 omega_copy_problem (copy, pb);
3788
3789 /* For all the outer loops "loop_j", add "dj = 0". */
3790 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3791 {
3792 eq = omega_add_zero_eq (copy, omega_black);
3793 copy->eqs[eq].coef[j + 1] = 1;
3794 }
3795
3796 /* For "loop_i", add "0 <= di". */
3797 geq = omega_add_zero_geq (copy, omega_black);
3798 copy->geqs[geq].coef[i + 1] = 1;
3799
3800 /* Reduce the constraint system, and test that the current
3801 problem is feasible. */
3802 res = omega_simplify_problem (copy);
3803 if (res == omega_false
3804 || res == omega_unknown
3805 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3806 goto next_problem;
3807
3808 for (eq = 0; eq < copy->num_subs; eq++)
3809 if (copy->subs[eq].key == (int) i + 1)
3810 {
3811 dist = copy->subs[eq].coef[0];
3812 goto found_dist;
3813 }
3814
3815 if (dist == 0)
3816 {
3817 /* Reinitialize problem... */
3818 omega_copy_problem (copy, pb);
3819 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3820 {
3821 eq = omega_add_zero_eq (copy, omega_black);
3822 copy->eqs[eq].coef[j + 1] = 1;
3823 }
3824
3825 /* ..., but this time "di = 1". */
3826 eq = omega_add_zero_eq (copy, omega_black);
3827 copy->eqs[eq].coef[i + 1] = 1;
3828 copy->eqs[eq].coef[0] = -1;
3829
3830 res = omega_simplify_problem (copy);
3831 if (res == omega_false
3832 || res == omega_unknown
3833 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3834 goto next_problem;
3835
3836 for (eq = 0; eq < copy->num_subs; eq++)
3837 if (copy->subs[eq].key == (int) i + 1)
3838 {
3839 dist = copy->subs[eq].coef[0];
3840 goto found_dist;
3841 }
3842 }
3843
3844 found_dist:;
3845 /* Save the lexicographically positive distance vector. */
3846 if (dist >= 0)
3847 {
3848 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3849 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3850
3851 dist_v[i] = dist;
3852
3853 for (eq = 0; eq < copy->num_subs; eq++)
3854 if (copy->subs[eq].key > 0)
3855 {
3856 dist = copy->subs[eq].coef[0];
3857 dist_v[copy->subs[eq].key - 1] = dist;
3858 }
3859
3860 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3861 dir_v[j] = dir_from_dist (dist_v[j]);
3862
3863 save_dist_v (ddr, dist_v);
3864 save_dir_v (ddr, dir_v);
3865 }
3866
3867 next_problem:;
3868 omega_free_problem (copy);
3869 }
3870 }
3871
3872 /* This is called for each subscript of a tuple of data references:
3873 insert an equality for representing the conflicts. */
3874
3875 static bool
3876 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3877 struct data_dependence_relation *ddr,
3878 omega_pb pb, bool *maybe_dependent)
3879 {
3880 int eq;
3881 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3882 TREE_TYPE (access_fun_b));
3883 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3884 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3885 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3886 tree minus_one;
3887
3888 /* When the fun_a - fun_b is not constant, the dependence is not
3889 captured by the classic distance vector representation. */
3890 if (TREE_CODE (difference) != INTEGER_CST)
3891 return false;
3892
3893 /* ZIV test. */
3894 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3895 {
3896 /* There is no dependence. */
3897 *maybe_dependent = false;
3898 return true;
3899 }
3900
3901 minus_one = build_int_cst (type, -1);
3902 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3903
3904 eq = omega_add_zero_eq (pb, omega_black);
3905 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3906 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3907 /* There is probably a dependence, but the system of
3908 constraints cannot be built: answer "don't know". */
3909 return false;
3910
3911 /* GCD test. */
3912 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3913 && !int_divides_p (lambda_vector_gcd
3914 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3915 2 * DDR_NB_LOOPS (ddr)),
3916 pb->eqs[eq].coef[0]))
3917 {
3918 /* There is no dependence. */
3919 *maybe_dependent = false;
3920 return true;
3921 }
3922
3923 return true;
3924 }
3925
3926 /* Helper function, same as init_omega_for_ddr but specialized for
3927 data references A and B. */
3928
3929 static bool
3930 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3931 struct data_dependence_relation *ddr,
3932 omega_pb pb, bool *maybe_dependent)
3933 {
3934 unsigned i;
3935 int ineq;
3936 struct loop *loopi;
3937 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3938
3939 /* Insert an equality per subscript. */
3940 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3941 {
3942 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3943 ddr, pb, maybe_dependent))
3944 return false;
3945 else if (*maybe_dependent == false)
3946 {
3947 /* There is no dependence. */
3948 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3949 return true;
3950 }
3951 }
3952
3953 /* Insert inequalities: constraints corresponding to the iteration
3954 domain, i.e. the loops surrounding the references "loop_x" and
3955 the distance variables "dx". The layout of the OMEGA
3956 representation is as follows:
3957 - coef[0] is the constant
3958 - coef[1..nb_loops] are the protected variables that will not be
3959 removed by the solver: the "dx"
3960 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3961 */
3962 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3963 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3964 {
3965 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3966
3967 /* 0 <= loop_x */
3968 ineq = omega_add_zero_geq (pb, omega_black);
3969 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3970
3971 /* 0 <= loop_x + dx */
3972 ineq = omega_add_zero_geq (pb, omega_black);
3973 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3974 pb->geqs[ineq].coef[i + 1] = 1;
3975
3976 if (nbi != -1)
3977 {
3978 /* loop_x <= nb_iters */
3979 ineq = omega_add_zero_geq (pb, omega_black);
3980 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3981 pb->geqs[ineq].coef[0] = nbi;
3982
3983 /* loop_x + dx <= nb_iters */
3984 ineq = omega_add_zero_geq (pb, omega_black);
3985 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3986 pb->geqs[ineq].coef[i + 1] = -1;
3987 pb->geqs[ineq].coef[0] = nbi;
3988
3989 /* A step "dx" bigger than nb_iters is not feasible, so
3990 add "0 <= nb_iters + dx", */
3991 ineq = omega_add_zero_geq (pb, omega_black);
3992 pb->geqs[ineq].coef[i + 1] = 1;
3993 pb->geqs[ineq].coef[0] = nbi;
3994 /* and "dx <= nb_iters". */
3995 ineq = omega_add_zero_geq (pb, omega_black);
3996 pb->geqs[ineq].coef[i + 1] = -1;
3997 pb->geqs[ineq].coef[0] = nbi;
3998 }
3999 }
4000
4001 omega_extract_distance_vectors (pb, ddr);
4002
4003 return true;
4004 }
4005
4006 /* Sets up the Omega dependence problem for the data dependence
4007 relation DDR. Returns false when the constraint system cannot be
4008 built, ie. when the test answers "don't know". Returns true
4009 otherwise, and when independence has been proved (using one of the
4010 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
4011 set MAYBE_DEPENDENT to true.
4012
4013 Example: for setting up the dependence system corresponding to the
4014 conflicting accesses
4015
4016 | loop_i
4017 | loop_j
4018 | A[i, i+1] = ...
4019 | ... A[2*j, 2*(i + j)]
4020 | endloop_j
4021 | endloop_i
4022
4023 the following constraints come from the iteration domain:
4024
4025 0 <= i <= Ni
4026 0 <= i + di <= Ni
4027 0 <= j <= Nj
4028 0 <= j + dj <= Nj
4029
4030 where di, dj are the distance variables. The constraints
4031 representing the conflicting elements are:
4032
4033 i = 2 * (j + dj)
4034 i + 1 = 2 * (i + di + j + dj)
4035
4036 For asking that the resulting distance vector (di, dj) be
4037 lexicographically positive, we insert the constraint "di >= 0". If
4038 "di = 0" in the solution, we fix that component to zero, and we
4039 look at the inner loops: we set a new problem where all the outer
4040 loop distances are zero, and fix this inner component to be
4041 positive. When one of the components is positive, we save that
4042 distance, and set a new problem where the distance on this loop is
4043 zero, searching for other distances in the inner loops. Here is
4044 the classic example that illustrates that we have to set for each
4045 inner loop a new problem:
4046
4047 | loop_1
4048 | loop_2
4049 | A[10]
4050 | endloop_2
4051 | endloop_1
4052
4053 we have to save two distances (1, 0) and (0, 1).
4054
4055 Given two array references, refA and refB, we have to set the
4056 dependence problem twice, refA vs. refB and refB vs. refA, and we
4057 cannot do a single test, as refB might occur before refA in the
4058 inner loops, and the contrary when considering outer loops: ex.
4059
4060 | loop_0
4061 | loop_1
4062 | loop_2
4063 | T[{1,+,1}_2][{1,+,1}_1] // refA
4064 | T[{2,+,1}_2][{0,+,1}_1] // refB
4065 | endloop_2
4066 | endloop_1
4067 | endloop_0
4068
4069 refB touches the elements in T before refA, and thus for the same
4070 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4071 but for successive loop_0 iterations, we have (1, -1, 1)
4072
4073 The Omega solver expects the distance variables ("di" in the
4074 previous example) to come first in the constraint system (as
4075 variables to be protected, or "safe" variables), the constraint
4076 system is built using the following layout:
4077
4078 "cst | distance vars | index vars".
4079 */
4080
4081 static bool
4082 init_omega_for_ddr (struct data_dependence_relation *ddr,
4083 bool *maybe_dependent)
4084 {
4085 omega_pb pb;
4086 bool res = false;
4087
4088 *maybe_dependent = true;
4089
4090 if (same_access_functions (ddr))
4091 {
4092 unsigned j;
4093 lambda_vector dir_v;
4094
4095 /* Save the 0 vector. */
4096 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4097 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4098 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4099 dir_v[j] = dir_equal;
4100 save_dir_v (ddr, dir_v);
4101
4102 /* Save the dependences carried by outer loops. */
4103 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4104 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4105 maybe_dependent);
4106 omega_free_problem (pb);
4107 return res;
4108 }
4109
4110 /* Omega expects the protected variables (those that have to be kept
4111 after elimination) to appear first in the constraint system.
4112 These variables are the distance variables. In the following
4113 initialization we declare NB_LOOPS safe variables, and the total
4114 number of variables for the constraint system is 2*NB_LOOPS. */
4115 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4116 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4117 maybe_dependent);
4118 omega_free_problem (pb);
4119
4120 /* Stop computation if not decidable, or no dependence. */
4121 if (res == false || *maybe_dependent == false)
4122 return res;
4123
4124 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4125 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4126 maybe_dependent);
4127 omega_free_problem (pb);
4128
4129 return res;
4130 }
4131
4132 /* Return true when DDR contains the same information as that stored
4133 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4134
4135 static bool
4136 ddr_consistent_p (FILE *file,
4137 struct data_dependence_relation *ddr,
4138 vec<lambda_vector> dist_vects,
4139 vec<lambda_vector> dir_vects)
4140 {
4141 unsigned int i, j;
4142
4143 /* If dump_file is set, output there. */
4144 if (dump_file && (dump_flags & TDF_DETAILS))
4145 file = dump_file;
4146
4147 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4148 {
4149 lambda_vector b_dist_v;
4150 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4151 dist_vects.length (),
4152 DDR_NUM_DIST_VECTS (ddr));
4153
4154 fprintf (file, "Banerjee dist vectors:\n");
4155 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4156 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4157
4158 fprintf (file, "Omega dist vectors:\n");
4159 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4160 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4161
4162 fprintf (file, "data dependence relation:\n");
4163 dump_data_dependence_relation (file, ddr);
4164
4165 fprintf (file, ")\n");
4166 return false;
4167 }
4168
4169 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4170 {
4171 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4172 dir_vects.length (),
4173 DDR_NUM_DIR_VECTS (ddr));
4174 return false;
4175 }
4176
4177 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4178 {
4179 lambda_vector a_dist_v;
4180 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4181
4182 /* Distance vectors are not ordered in the same way in the DDR
4183 and in the DIST_VECTS: search for a matching vector. */
4184 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4185 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4186 break;
4187
4188 if (j == dist_vects.length ())
4189 {
4190 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4191 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4192 fprintf (file, "not found in Omega dist vectors:\n");
4193 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4194 fprintf (file, "data dependence relation:\n");
4195 dump_data_dependence_relation (file, ddr);
4196 fprintf (file, ")\n");
4197 }
4198 }
4199
4200 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4201 {
4202 lambda_vector a_dir_v;
4203 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4204
4205 /* Direction vectors are not ordered in the same way in the DDR
4206 and in the DIR_VECTS: search for a matching vector. */
4207 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4208 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4209 break;
4210
4211 if (j == dist_vects.length ())
4212 {
4213 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4214 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4215 fprintf (file, "not found in Omega dir vectors:\n");
4216 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4217 fprintf (file, "data dependence relation:\n");
4218 dump_data_dependence_relation (file, ddr);
4219 fprintf (file, ")\n");
4220 }
4221 }
4222
4223 return true;
4224 }
4225
4226 /* This computes the affine dependence relation between A and B with
4227 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4228 independence between two accesses, while CHREC_DONT_KNOW is used
4229 for representing the unknown relation.
4230
4231 Note that it is possible to stop the computation of the dependence
4232 relation the first time we detect a CHREC_KNOWN element for a given
4233 subscript. */
4234
4235 void
4236 compute_affine_dependence (struct data_dependence_relation *ddr,
4237 struct loop *loop_nest)
4238 {
4239 struct data_reference *dra = DDR_A (ddr);
4240 struct data_reference *drb = DDR_B (ddr);
4241
4242 if (dump_file && (dump_flags & TDF_DETAILS))
4243 {
4244 fprintf (dump_file, "(compute_affine_dependence\n");
4245 fprintf (dump_file, " stmt_a: ");
4246 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4247 fprintf (dump_file, " stmt_b: ");
4248 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4249 }
4250
4251 /* Analyze only when the dependence relation is not yet known. */
4252 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4253 {
4254 dependence_stats.num_dependence_tests++;
4255
4256 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4257 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4258 {
4259 subscript_dependence_tester (ddr, loop_nest);
4260
4261 if (flag_check_data_deps)
4262 {
4263 /* Dump the dependences from the first algorithm. */
4264 if (dump_file && (dump_flags & TDF_DETAILS))
4265 {
4266 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4267 dump_data_dependence_relation (dump_file, ddr);
4268 }
4269
4270 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4271 {
4272 bool maybe_dependent;
4273 vec<lambda_vector> dir_vects, dist_vects;
4274
4275 /* Save the result of the first DD analyzer. */
4276 dist_vects = DDR_DIST_VECTS (ddr);
4277 dir_vects = DDR_DIR_VECTS (ddr);
4278
4279 /* Reset the information. */
4280 DDR_DIST_VECTS (ddr).create (0);
4281 DDR_DIR_VECTS (ddr).create (0);
4282
4283 /* Compute the same information using Omega. */
4284 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4285 goto csys_dont_know;
4286
4287 if (dump_file && (dump_flags & TDF_DETAILS))
4288 {
4289 fprintf (dump_file, "Omega Analyzer\n");
4290 dump_data_dependence_relation (dump_file, ddr);
4291 }
4292
4293 /* Check that we get the same information. */
4294 if (maybe_dependent)
4295 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4296 dir_vects));
4297 }
4298 }
4299 }
4300
4301 /* As a last case, if the dependence cannot be determined, or if
4302 the dependence is considered too difficult to determine, answer
4303 "don't know". */
4304 else
4305 {
4306 csys_dont_know:;
4307 dependence_stats.num_dependence_undetermined++;
4308
4309 if (dump_file && (dump_flags & TDF_DETAILS))
4310 {
4311 fprintf (dump_file, "Data ref a:\n");
4312 dump_data_reference (dump_file, dra);
4313 fprintf (dump_file, "Data ref b:\n");
4314 dump_data_reference (dump_file, drb);
4315 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4316 }
4317 finalize_ddr_dependent (ddr, chrec_dont_know);
4318 }
4319 }
4320
4321 if (dump_file && (dump_flags & TDF_DETAILS))
4322 {
4323 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4324 fprintf (dump_file, ") -> no dependence\n");
4325 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4326 fprintf (dump_file, ") -> dependence analysis failed\n");
4327 else
4328 fprintf (dump_file, ")\n");
4329 }
4330 }
4331
4332 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4333 the data references in DATAREFS, in the LOOP_NEST. When
4334 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4335 relations. Return true when successful, i.e. data references number
4336 is small enough to be handled. */
4337
4338 bool
4339 compute_all_dependences (vec<data_reference_p> datarefs,
4340 vec<ddr_p> *dependence_relations,
4341 vec<loop_p> loop_nest,
4342 bool compute_self_and_rr)
4343 {
4344 struct data_dependence_relation *ddr;
4345 struct data_reference *a, *b;
4346 unsigned int i, j;
4347
4348 if ((int) datarefs.length ()
4349 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4350 {
4351 struct data_dependence_relation *ddr;
4352
4353 /* Insert a single relation into dependence_relations:
4354 chrec_dont_know. */
4355 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4356 dependence_relations->safe_push (ddr);
4357 return false;
4358 }
4359
4360 FOR_EACH_VEC_ELT (datarefs, i, a)
4361 for (j = i + 1; datarefs.iterate (j, &b); j++)
4362 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4363 {
4364 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4365 dependence_relations->safe_push (ddr);
4366 if (loop_nest.exists ())
4367 compute_affine_dependence (ddr, loop_nest[0]);
4368 }
4369
4370 if (compute_self_and_rr)
4371 FOR_EACH_VEC_ELT (datarefs, i, a)
4372 {
4373 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4374 dependence_relations->safe_push (ddr);
4375 if (loop_nest.exists ())
4376 compute_affine_dependence (ddr, loop_nest[0]);
4377 }
4378
4379 return true;
4380 }
4381
4382 /* Describes a location of a memory reference. */
4383
4384 typedef struct data_ref_loc_d
4385 {
4386 /* The memory reference. */
4387 tree ref;
4388
4389 /* True if the memory reference is read. */
4390 bool is_read;
4391 } data_ref_loc;
4392
4393
4394 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4395 true if STMT clobbers memory, false otherwise. */
4396
4397 static bool
4398 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4399 {
4400 bool clobbers_memory = false;
4401 data_ref_loc ref;
4402 tree op0, op1;
4403 enum gimple_code stmt_code = gimple_code (stmt);
4404
4405 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4406 As we cannot model data-references to not spelled out
4407 accesses give up if they may occur. */
4408 if (stmt_code == GIMPLE_CALL
4409 && !(gimple_call_flags (stmt) & ECF_CONST))
4410 {
4411 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4412 if (gimple_call_internal_p (stmt))
4413 switch (gimple_call_internal_fn (stmt))
4414 {
4415 case IFN_GOMP_SIMD_LANE:
4416 {
4417 struct loop *loop = gimple_bb (stmt)->loop_father;
4418 tree uid = gimple_call_arg (stmt, 0);
4419 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4420 if (loop == NULL
4421 || loop->simduid != SSA_NAME_VAR (uid))
4422 clobbers_memory = true;
4423 break;
4424 }
4425 case IFN_MASK_LOAD:
4426 case IFN_MASK_STORE:
4427 break;
4428 default:
4429 clobbers_memory = true;
4430 break;
4431 }
4432 else
4433 clobbers_memory = true;
4434 }
4435 else if (stmt_code == GIMPLE_ASM
4436 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4437 || gimple_vuse (stmt)))
4438 clobbers_memory = true;
4439
4440 if (!gimple_vuse (stmt))
4441 return clobbers_memory;
4442
4443 if (stmt_code == GIMPLE_ASSIGN)
4444 {
4445 tree base;
4446 op0 = gimple_assign_lhs (stmt);
4447 op1 = gimple_assign_rhs1 (stmt);
4448
4449 if (DECL_P (op1)
4450 || (REFERENCE_CLASS_P (op1)
4451 && (base = get_base_address (op1))
4452 && TREE_CODE (base) != SSA_NAME))
4453 {
4454 ref.ref = op1;
4455 ref.is_read = true;
4456 references->safe_push (ref);
4457 }
4458 }
4459 else if (stmt_code == GIMPLE_CALL)
4460 {
4461 unsigned i, n;
4462
4463 ref.is_read = false;
4464 if (gimple_call_internal_p (stmt))
4465 switch (gimple_call_internal_fn (stmt))
4466 {
4467 case IFN_MASK_LOAD:
4468 if (gimple_call_lhs (stmt) == NULL_TREE)
4469 break;
4470 ref.is_read = true;
4471 case IFN_MASK_STORE:
4472 ref.ref = fold_build2 (MEM_REF,
4473 ref.is_read
4474 ? TREE_TYPE (gimple_call_lhs (stmt))
4475 : TREE_TYPE (gimple_call_arg (stmt, 3)),
4476 gimple_call_arg (stmt, 0),
4477 gimple_call_arg (stmt, 1));
4478 references->safe_push (ref);
4479 return false;
4480 default:
4481 break;
4482 }
4483
4484 op0 = gimple_call_lhs (stmt);
4485 n = gimple_call_num_args (stmt);
4486 for (i = 0; i < n; i++)
4487 {
4488 op1 = gimple_call_arg (stmt, i);
4489
4490 if (DECL_P (op1)
4491 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4492 {
4493 ref.ref = op1;
4494 ref.is_read = true;
4495 references->safe_push (ref);
4496 }
4497 }
4498 }
4499 else
4500 return clobbers_memory;
4501
4502 if (op0
4503 && (DECL_P (op0)
4504 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4505 {
4506 ref.ref = op0;
4507 ref.is_read = false;
4508 references->safe_push (ref);
4509 }
4510 return clobbers_memory;
4511 }
4512
4513 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4514 reference, returns false, otherwise returns true. NEST is the outermost
4515 loop of the loop nest in which the references should be analyzed. */
4516
4517 bool
4518 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4519 vec<data_reference_p> *datarefs)
4520 {
4521 unsigned i;
4522 auto_vec<data_ref_loc, 2> references;
4523 data_ref_loc *ref;
4524 bool ret = true;
4525 data_reference_p dr;
4526
4527 if (get_references_in_stmt (stmt, &references))
4528 return false;
4529
4530 FOR_EACH_VEC_ELT (references, i, ref)
4531 {
4532 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4533 ref->ref, stmt, ref->is_read);
4534 gcc_assert (dr != NULL);
4535 datarefs->safe_push (dr);
4536 }
4537 references.release ();
4538 return ret;
4539 }
4540
4541 /* Stores the data references in STMT to DATAREFS. If there is an
4542 unanalyzable reference, returns false, otherwise returns true.
4543 NEST is the outermost loop of the loop nest in which the references
4544 should be instantiated, LOOP is the loop in which the references
4545 should be analyzed. */
4546
4547 bool
4548 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4549 vec<data_reference_p> *datarefs)
4550 {
4551 unsigned i;
4552 auto_vec<data_ref_loc, 2> references;
4553 data_ref_loc *ref;
4554 bool ret = true;
4555 data_reference_p dr;
4556
4557 if (get_references_in_stmt (stmt, &references))
4558 return false;
4559
4560 FOR_EACH_VEC_ELT (references, i, ref)
4561 {
4562 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4563 gcc_assert (dr != NULL);
4564 datarefs->safe_push (dr);
4565 }
4566
4567 references.release ();
4568 return ret;
4569 }
4570
4571 /* Search the data references in LOOP, and record the information into
4572 DATAREFS. Returns chrec_dont_know when failing to analyze a
4573 difficult case, returns NULL_TREE otherwise. */
4574
4575 tree
4576 find_data_references_in_bb (struct loop *loop, basic_block bb,
4577 vec<data_reference_p> *datarefs)
4578 {
4579 gimple_stmt_iterator bsi;
4580
4581 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4582 {
4583 gimple stmt = gsi_stmt (bsi);
4584
4585 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4586 {
4587 struct data_reference *res;
4588 res = XCNEW (struct data_reference);
4589 datarefs->safe_push (res);
4590
4591 return chrec_dont_know;
4592 }
4593 }
4594
4595 return NULL_TREE;
4596 }
4597
4598 /* Search the data references in LOOP, and record the information into
4599 DATAREFS. Returns chrec_dont_know when failing to analyze a
4600 difficult case, returns NULL_TREE otherwise.
4601
4602 TODO: This function should be made smarter so that it can handle address
4603 arithmetic as if they were array accesses, etc. */
4604
4605 tree
4606 find_data_references_in_loop (struct loop *loop,
4607 vec<data_reference_p> *datarefs)
4608 {
4609 basic_block bb, *bbs;
4610 unsigned int i;
4611
4612 bbs = get_loop_body_in_dom_order (loop);
4613
4614 for (i = 0; i < loop->num_nodes; i++)
4615 {
4616 bb = bbs[i];
4617
4618 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4619 {
4620 free (bbs);
4621 return chrec_dont_know;
4622 }
4623 }
4624 free (bbs);
4625
4626 return NULL_TREE;
4627 }
4628
4629 /* Recursive helper function. */
4630
4631 static bool
4632 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4633 {
4634 /* Inner loops of the nest should not contain siblings. Example:
4635 when there are two consecutive loops,
4636
4637 | loop_0
4638 | loop_1
4639 | A[{0, +, 1}_1]
4640 | endloop_1
4641 | loop_2
4642 | A[{0, +, 1}_2]
4643 | endloop_2
4644 | endloop_0
4645
4646 the dependence relation cannot be captured by the distance
4647 abstraction. */
4648 if (loop->next)
4649 return false;
4650
4651 loop_nest->safe_push (loop);
4652 if (loop->inner)
4653 return find_loop_nest_1 (loop->inner, loop_nest);
4654 return true;
4655 }
4656
4657 /* Return false when the LOOP is not well nested. Otherwise return
4658 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4659 contain the loops from the outermost to the innermost, as they will
4660 appear in the classic distance vector. */
4661
4662 bool
4663 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4664 {
4665 loop_nest->safe_push (loop);
4666 if (loop->inner)
4667 return find_loop_nest_1 (loop->inner, loop_nest);
4668 return true;
4669 }
4670
4671 /* Returns true when the data dependences have been computed, false otherwise.
4672 Given a loop nest LOOP, the following vectors are returned:
4673 DATAREFS is initialized to all the array elements contained in this loop,
4674 DEPENDENCE_RELATIONS contains the relations between the data references.
4675 Compute read-read and self relations if
4676 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4677
4678 bool
4679 compute_data_dependences_for_loop (struct loop *loop,
4680 bool compute_self_and_read_read_dependences,
4681 vec<loop_p> *loop_nest,
4682 vec<data_reference_p> *datarefs,
4683 vec<ddr_p> *dependence_relations)
4684 {
4685 bool res = true;
4686
4687 memset (&dependence_stats, 0, sizeof (dependence_stats));
4688
4689 /* If the loop nest is not well formed, or one of the data references
4690 is not computable, give up without spending time to compute other
4691 dependences. */
4692 if (!loop
4693 || !find_loop_nest (loop, loop_nest)
4694 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4695 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4696 compute_self_and_read_read_dependences))
4697 res = false;
4698
4699 if (dump_file && (dump_flags & TDF_STATS))
4700 {
4701 fprintf (dump_file, "Dependence tester statistics:\n");
4702
4703 fprintf (dump_file, "Number of dependence tests: %d\n",
4704 dependence_stats.num_dependence_tests);
4705 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4706 dependence_stats.num_dependence_dependent);
4707 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4708 dependence_stats.num_dependence_independent);
4709 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4710 dependence_stats.num_dependence_undetermined);
4711
4712 fprintf (dump_file, "Number of subscript tests: %d\n",
4713 dependence_stats.num_subscript_tests);
4714 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4715 dependence_stats.num_subscript_undetermined);
4716 fprintf (dump_file, "Number of same subscript function: %d\n",
4717 dependence_stats.num_same_subscript_function);
4718
4719 fprintf (dump_file, "Number of ziv tests: %d\n",
4720 dependence_stats.num_ziv);
4721 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4722 dependence_stats.num_ziv_dependent);
4723 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4724 dependence_stats.num_ziv_independent);
4725 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4726 dependence_stats.num_ziv_unimplemented);
4727
4728 fprintf (dump_file, "Number of siv tests: %d\n",
4729 dependence_stats.num_siv);
4730 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4731 dependence_stats.num_siv_dependent);
4732 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4733 dependence_stats.num_siv_independent);
4734 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4735 dependence_stats.num_siv_unimplemented);
4736
4737 fprintf (dump_file, "Number of miv tests: %d\n",
4738 dependence_stats.num_miv);
4739 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4740 dependence_stats.num_miv_dependent);
4741 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4742 dependence_stats.num_miv_independent);
4743 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4744 dependence_stats.num_miv_unimplemented);
4745 }
4746
4747 return res;
4748 }
4749
4750 /* Returns true when the data dependences for the basic block BB have been
4751 computed, false otherwise.
4752 DATAREFS is initialized to all the array elements contained in this basic
4753 block, DEPENDENCE_RELATIONS contains the relations between the data
4754 references. Compute read-read and self relations if
4755 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4756 bool
4757 compute_data_dependences_for_bb (basic_block bb,
4758 bool compute_self_and_read_read_dependences,
4759 vec<data_reference_p> *datarefs,
4760 vec<ddr_p> *dependence_relations)
4761 {
4762 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4763 return false;
4764
4765 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4766 compute_self_and_read_read_dependences);
4767 }
4768
4769 /* Entry point (for testing only). Analyze all the data references
4770 and the dependence relations in LOOP.
4771
4772 The data references are computed first.
4773
4774 A relation on these nodes is represented by a complete graph. Some
4775 of the relations could be of no interest, thus the relations can be
4776 computed on demand.
4777
4778 In the following function we compute all the relations. This is
4779 just a first implementation that is here for:
4780 - for showing how to ask for the dependence relations,
4781 - for the debugging the whole dependence graph,
4782 - for the dejagnu testcases and maintenance.
4783
4784 It is possible to ask only for a part of the graph, avoiding to
4785 compute the whole dependence graph. The computed dependences are
4786 stored in a knowledge base (KB) such that later queries don't
4787 recompute the same information. The implementation of this KB is
4788 transparent to the optimizer, and thus the KB can be changed with a
4789 more efficient implementation, or the KB could be disabled. */
4790 static void
4791 analyze_all_data_dependences (struct loop *loop)
4792 {
4793 unsigned int i;
4794 int nb_data_refs = 10;
4795 vec<data_reference_p> datarefs;
4796 datarefs.create (nb_data_refs);
4797 vec<ddr_p> dependence_relations;
4798 dependence_relations.create (nb_data_refs * nb_data_refs);
4799 vec<loop_p> loop_nest;
4800 loop_nest.create (3);
4801
4802 /* Compute DDs on the whole function. */
4803 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4804 &dependence_relations);
4805
4806 if (dump_file)
4807 {
4808 dump_data_dependence_relations (dump_file, dependence_relations);
4809 fprintf (dump_file, "\n\n");
4810
4811 if (dump_flags & TDF_DETAILS)
4812 dump_dist_dir_vectors (dump_file, dependence_relations);
4813
4814 if (dump_flags & TDF_STATS)
4815 {
4816 unsigned nb_top_relations = 0;
4817 unsigned nb_bot_relations = 0;
4818 unsigned nb_chrec_relations = 0;
4819 struct data_dependence_relation *ddr;
4820
4821 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4822 {
4823 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4824 nb_top_relations++;
4825
4826 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4827 nb_bot_relations++;
4828
4829 else
4830 nb_chrec_relations++;
4831 }
4832
4833 gather_stats_on_scev_database ();
4834 }
4835 }
4836
4837 loop_nest.release ();
4838 free_dependence_relations (dependence_relations);
4839 free_data_refs (datarefs);
4840 }
4841
4842 /* Computes all the data dependences and check that the results of
4843 several analyzers are the same. */
4844
4845 void
4846 tree_check_data_deps (void)
4847 {
4848 struct loop *loop_nest;
4849
4850 FOR_EACH_LOOP (loop_nest, 0)
4851 analyze_all_data_dependences (loop_nest);
4852 }
4853
4854 /* Free the memory used by a data dependence relation DDR. */
4855
4856 void
4857 free_dependence_relation (struct data_dependence_relation *ddr)
4858 {
4859 if (ddr == NULL)
4860 return;
4861
4862 if (DDR_SUBSCRIPTS (ddr).exists ())
4863 free_subscripts (DDR_SUBSCRIPTS (ddr));
4864 DDR_DIST_VECTS (ddr).release ();
4865 DDR_DIR_VECTS (ddr).release ();
4866
4867 free (ddr);
4868 }
4869
4870 /* Free the memory used by the data dependence relations from
4871 DEPENDENCE_RELATIONS. */
4872
4873 void
4874 free_dependence_relations (vec<ddr_p> dependence_relations)
4875 {
4876 unsigned int i;
4877 struct data_dependence_relation *ddr;
4878
4879 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4880 if (ddr)
4881 free_dependence_relation (ddr);
4882
4883 dependence_relations.release ();
4884 }
4885
4886 /* Free the memory used by the data references from DATAREFS. */
4887
4888 void
4889 free_data_refs (vec<data_reference_p> datarefs)
4890 {
4891 unsigned int i;
4892 struct data_reference *dr;
4893
4894 FOR_EACH_VEC_ELT (datarefs, i, dr)
4895 free_data_ref (dr);
4896 datarefs.release ();
4897 }