genattrtab.c (write_header): Include hash-set.h...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "hash-set.h"
80 #include "machmode.h"
81 #include "vec.h"
82 #include "double-int.h"
83 #include "input.h"
84 #include "alias.h"
85 #include "symtab.h"
86 #include "options.h"
87 #include "wide-int.h"
88 #include "inchash.h"
89 #include "tree.h"
90 #include "fold-const.h"
91 #include "expr.h"
92 #include "gimple-pretty-print.h"
93 #include "predict.h"
94 #include "tm.h"
95 #include "hard-reg-set.h"
96 #include "input.h"
97 #include "function.h"
98 #include "dominance.h"
99 #include "cfg.h"
100 #include "basic-block.h"
101 #include "tree-ssa-alias.h"
102 #include "internal-fn.h"
103 #include "gimple-expr.h"
104 #include "is-a.h"
105 #include "gimple.h"
106 #include "gimple-iterator.h"
107 #include "tree-ssa-loop-niter.h"
108 #include "tree-ssa-loop.h"
109 #include "tree-ssa.h"
110 #include "cfgloop.h"
111 #include "tree-data-ref.h"
112 #include "tree-scalar-evolution.h"
113 #include "dumpfile.h"
114 #include "langhooks.h"
115 #include "tree-affine.h"
116 #include "params.h"
117
118 static struct datadep_stats
119 {
120 int num_dependence_tests;
121 int num_dependence_dependent;
122 int num_dependence_independent;
123 int num_dependence_undetermined;
124
125 int num_subscript_tests;
126 int num_subscript_undetermined;
127 int num_same_subscript_function;
128
129 int num_ziv;
130 int num_ziv_independent;
131 int num_ziv_dependent;
132 int num_ziv_unimplemented;
133
134 int num_siv;
135 int num_siv_independent;
136 int num_siv_dependent;
137 int num_siv_unimplemented;
138
139 int num_miv;
140 int num_miv_independent;
141 int num_miv_dependent;
142 int num_miv_unimplemented;
143 } dependence_stats;
144
145 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
146 struct data_reference *,
147 struct data_reference *,
148 struct loop *);
149 /* Returns true iff A divides B. */
150
151 static inline bool
152 tree_fold_divides_p (const_tree a, const_tree b)
153 {
154 gcc_assert (TREE_CODE (a) == INTEGER_CST);
155 gcc_assert (TREE_CODE (b) == INTEGER_CST);
156 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
157 }
158
159 /* Returns true iff A divides B. */
160
161 static inline bool
162 int_divides_p (int a, int b)
163 {
164 return ((b % a) == 0);
165 }
166
167 \f
168
169 /* Dump into FILE all the data references from DATAREFS. */
170
171 static void
172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
173 {
174 unsigned int i;
175 struct data_reference *dr;
176
177 FOR_EACH_VEC_ELT (datarefs, i, dr)
178 dump_data_reference (file, dr);
179 }
180
181 /* Unified dump into FILE all the data references from DATAREFS. */
182
183 DEBUG_FUNCTION void
184 debug (vec<data_reference_p> &ref)
185 {
186 dump_data_references (stderr, ref);
187 }
188
189 DEBUG_FUNCTION void
190 debug (vec<data_reference_p> *ptr)
191 {
192 if (ptr)
193 debug (*ptr);
194 else
195 fprintf (stderr, "<nil>\n");
196 }
197
198
199 /* Dump into STDERR all the data references from DATAREFS. */
200
201 DEBUG_FUNCTION void
202 debug_data_references (vec<data_reference_p> datarefs)
203 {
204 dump_data_references (stderr, datarefs);
205 }
206
207 /* Print to STDERR the data_reference DR. */
208
209 DEBUG_FUNCTION void
210 debug_data_reference (struct data_reference *dr)
211 {
212 dump_data_reference (stderr, dr);
213 }
214
215 /* Dump function for a DATA_REFERENCE structure. */
216
217 void
218 dump_data_reference (FILE *outf,
219 struct data_reference *dr)
220 {
221 unsigned int i;
222
223 fprintf (outf, "#(Data Ref: \n");
224 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
225 fprintf (outf, "# stmt: ");
226 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
227 fprintf (outf, "# ref: ");
228 print_generic_stmt (outf, DR_REF (dr), 0);
229 fprintf (outf, "# base_object: ");
230 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
231
232 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
233 {
234 fprintf (outf, "# Access function %d: ", i);
235 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
236 }
237 fprintf (outf, "#)\n");
238 }
239
240 /* Unified dump function for a DATA_REFERENCE structure. */
241
242 DEBUG_FUNCTION void
243 debug (data_reference &ref)
244 {
245 dump_data_reference (stderr, &ref);
246 }
247
248 DEBUG_FUNCTION void
249 debug (data_reference *ptr)
250 {
251 if (ptr)
252 debug (*ptr);
253 else
254 fprintf (stderr, "<nil>\n");
255 }
256
257
258 /* Dumps the affine function described by FN to the file OUTF. */
259
260 static void
261 dump_affine_function (FILE *outf, affine_fn fn)
262 {
263 unsigned i;
264 tree coef;
265
266 print_generic_expr (outf, fn[0], TDF_SLIM);
267 for (i = 1; fn.iterate (i, &coef); i++)
268 {
269 fprintf (outf, " + ");
270 print_generic_expr (outf, coef, TDF_SLIM);
271 fprintf (outf, " * x_%u", i);
272 }
273 }
274
275 /* Dumps the conflict function CF to the file OUTF. */
276
277 static void
278 dump_conflict_function (FILE *outf, conflict_function *cf)
279 {
280 unsigned i;
281
282 if (cf->n == NO_DEPENDENCE)
283 fprintf (outf, "no dependence");
284 else if (cf->n == NOT_KNOWN)
285 fprintf (outf, "not known");
286 else
287 {
288 for (i = 0; i < cf->n; i++)
289 {
290 if (i != 0)
291 fprintf (outf, " ");
292 fprintf (outf, "[");
293 dump_affine_function (outf, cf->fns[i]);
294 fprintf (outf, "]");
295 }
296 }
297 }
298
299 /* Dump function for a SUBSCRIPT structure. */
300
301 static void
302 dump_subscript (FILE *outf, struct subscript *subscript)
303 {
304 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
305
306 fprintf (outf, "\n (subscript \n");
307 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
308 dump_conflict_function (outf, cf);
309 if (CF_NONTRIVIAL_P (cf))
310 {
311 tree last_iteration = SUB_LAST_CONFLICT (subscript);
312 fprintf (outf, "\n last_conflict: ");
313 print_generic_expr (outf, last_iteration, 0);
314 }
315
316 cf = SUB_CONFLICTS_IN_B (subscript);
317 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
318 dump_conflict_function (outf, cf);
319 if (CF_NONTRIVIAL_P (cf))
320 {
321 tree last_iteration = SUB_LAST_CONFLICT (subscript);
322 fprintf (outf, "\n last_conflict: ");
323 print_generic_expr (outf, last_iteration, 0);
324 }
325
326 fprintf (outf, "\n (Subscript distance: ");
327 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
328 fprintf (outf, " ))\n");
329 }
330
331 /* Print the classic direction vector DIRV to OUTF. */
332
333 static void
334 print_direction_vector (FILE *outf,
335 lambda_vector dirv,
336 int length)
337 {
338 int eq;
339
340 for (eq = 0; eq < length; eq++)
341 {
342 enum data_dependence_direction dir = ((enum data_dependence_direction)
343 dirv[eq]);
344
345 switch (dir)
346 {
347 case dir_positive:
348 fprintf (outf, " +");
349 break;
350 case dir_negative:
351 fprintf (outf, " -");
352 break;
353 case dir_equal:
354 fprintf (outf, " =");
355 break;
356 case dir_positive_or_equal:
357 fprintf (outf, " +=");
358 break;
359 case dir_positive_or_negative:
360 fprintf (outf, " +-");
361 break;
362 case dir_negative_or_equal:
363 fprintf (outf, " -=");
364 break;
365 case dir_star:
366 fprintf (outf, " *");
367 break;
368 default:
369 fprintf (outf, "indep");
370 break;
371 }
372 }
373 fprintf (outf, "\n");
374 }
375
376 /* Print a vector of direction vectors. */
377
378 static void
379 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
380 int length)
381 {
382 unsigned j;
383 lambda_vector v;
384
385 FOR_EACH_VEC_ELT (dir_vects, j, v)
386 print_direction_vector (outf, v, length);
387 }
388
389 /* Print out a vector VEC of length N to OUTFILE. */
390
391 static inline void
392 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
393 {
394 int i;
395
396 for (i = 0; i < n; i++)
397 fprintf (outfile, "%3d ", vector[i]);
398 fprintf (outfile, "\n");
399 }
400
401 /* Print a vector of distance vectors. */
402
403 static void
404 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
405 int length)
406 {
407 unsigned j;
408 lambda_vector v;
409
410 FOR_EACH_VEC_ELT (dist_vects, j, v)
411 print_lambda_vector (outf, v, length);
412 }
413
414 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
415
416 static void
417 dump_data_dependence_relation (FILE *outf,
418 struct data_dependence_relation *ddr)
419 {
420 struct data_reference *dra, *drb;
421
422 fprintf (outf, "(Data Dep: \n");
423
424 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
425 {
426 if (ddr)
427 {
428 dra = DDR_A (ddr);
429 drb = DDR_B (ddr);
430 if (dra)
431 dump_data_reference (outf, dra);
432 else
433 fprintf (outf, " (nil)\n");
434 if (drb)
435 dump_data_reference (outf, drb);
436 else
437 fprintf (outf, " (nil)\n");
438 }
439 fprintf (outf, " (don't know)\n)\n");
440 return;
441 }
442
443 dra = DDR_A (ddr);
444 drb = DDR_B (ddr);
445 dump_data_reference (outf, dra);
446 dump_data_reference (outf, drb);
447
448 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
449 fprintf (outf, " (no dependence)\n");
450
451 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
452 {
453 unsigned int i;
454 struct loop *loopi;
455
456 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
457 {
458 fprintf (outf, " access_fn_A: ");
459 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
460 fprintf (outf, " access_fn_B: ");
461 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
462 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
463 }
464
465 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
466 fprintf (outf, " loop nest: (");
467 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
468 fprintf (outf, "%d ", loopi->num);
469 fprintf (outf, ")\n");
470
471 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
472 {
473 fprintf (outf, " distance_vector: ");
474 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
475 DDR_NB_LOOPS (ddr));
476 }
477
478 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
479 {
480 fprintf (outf, " direction_vector: ");
481 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
482 DDR_NB_LOOPS (ddr));
483 }
484 }
485
486 fprintf (outf, ")\n");
487 }
488
489 /* Debug version. */
490
491 DEBUG_FUNCTION void
492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
493 {
494 dump_data_dependence_relation (stderr, ddr);
495 }
496
497 /* Dump into FILE all the dependence relations from DDRS. */
498
499 void
500 dump_data_dependence_relations (FILE *file,
501 vec<ddr_p> ddrs)
502 {
503 unsigned int i;
504 struct data_dependence_relation *ddr;
505
506 FOR_EACH_VEC_ELT (ddrs, i, ddr)
507 dump_data_dependence_relation (file, ddr);
508 }
509
510 DEBUG_FUNCTION void
511 debug (vec<ddr_p> &ref)
512 {
513 dump_data_dependence_relations (stderr, ref);
514 }
515
516 DEBUG_FUNCTION void
517 debug (vec<ddr_p> *ptr)
518 {
519 if (ptr)
520 debug (*ptr);
521 else
522 fprintf (stderr, "<nil>\n");
523 }
524
525
526 /* Dump to STDERR all the dependence relations from DDRS. */
527
528 DEBUG_FUNCTION void
529 debug_data_dependence_relations (vec<ddr_p> ddrs)
530 {
531 dump_data_dependence_relations (stderr, ddrs);
532 }
533
534 /* Dumps the distance and direction vectors in FILE. DDRS contains
535 the dependence relations, and VECT_SIZE is the size of the
536 dependence vectors, or in other words the number of loops in the
537 considered nest. */
538
539 static void
540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
541 {
542 unsigned int i, j;
543 struct data_dependence_relation *ddr;
544 lambda_vector v;
545
546 FOR_EACH_VEC_ELT (ddrs, i, ddr)
547 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
548 {
549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
550 {
551 fprintf (file, "DISTANCE_V (");
552 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
553 fprintf (file, ")\n");
554 }
555
556 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
557 {
558 fprintf (file, "DIRECTION_V (");
559 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
560 fprintf (file, ")\n");
561 }
562 }
563
564 fprintf (file, "\n\n");
565 }
566
567 /* Dumps the data dependence relations DDRS in FILE. */
568
569 static void
570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
571 {
572 unsigned int i;
573 struct data_dependence_relation *ddr;
574
575 FOR_EACH_VEC_ELT (ddrs, i, ddr)
576 dump_data_dependence_relation (file, ddr);
577
578 fprintf (file, "\n\n");
579 }
580
581 DEBUG_FUNCTION void
582 debug_ddrs (vec<ddr_p> ddrs)
583 {
584 dump_ddrs (stderr, ddrs);
585 }
586
587 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
588 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
589 constant of type ssizetype, and returns true. If we cannot do this
590 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
591 is returned. */
592
593 static bool
594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
595 tree *var, tree *off)
596 {
597 tree var0, var1;
598 tree off0, off1;
599 enum tree_code ocode = code;
600
601 *var = NULL_TREE;
602 *off = NULL_TREE;
603
604 switch (code)
605 {
606 case INTEGER_CST:
607 *var = build_int_cst (type, 0);
608 *off = fold_convert (ssizetype, op0);
609 return true;
610
611 case POINTER_PLUS_EXPR:
612 ocode = PLUS_EXPR;
613 /* FALLTHROUGH */
614 case PLUS_EXPR:
615 case MINUS_EXPR:
616 split_constant_offset (op0, &var0, &off0);
617 split_constant_offset (op1, &var1, &off1);
618 *var = fold_build2 (code, type, var0, var1);
619 *off = size_binop (ocode, off0, off1);
620 return true;
621
622 case MULT_EXPR:
623 if (TREE_CODE (op1) != INTEGER_CST)
624 return false;
625
626 split_constant_offset (op0, &var0, &off0);
627 *var = fold_build2 (MULT_EXPR, type, var0, op1);
628 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
629 return true;
630
631 case ADDR_EXPR:
632 {
633 tree base, poffset;
634 HOST_WIDE_INT pbitsize, pbitpos;
635 machine_mode pmode;
636 int punsignedp, pvolatilep;
637
638 op0 = TREE_OPERAND (op0, 0);
639 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
640 &pmode, &punsignedp, &pvolatilep, false);
641
642 if (pbitpos % BITS_PER_UNIT != 0)
643 return false;
644 base = build_fold_addr_expr (base);
645 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
646
647 if (poffset)
648 {
649 split_constant_offset (poffset, &poffset, &off1);
650 off0 = size_binop (PLUS_EXPR, off0, off1);
651 if (POINTER_TYPE_P (TREE_TYPE (base)))
652 base = fold_build_pointer_plus (base, poffset);
653 else
654 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
655 fold_convert (TREE_TYPE (base), poffset));
656 }
657
658 var0 = fold_convert (type, base);
659
660 /* If variable length types are involved, punt, otherwise casts
661 might be converted into ARRAY_REFs in gimplify_conversion.
662 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
663 possibly no longer appears in current GIMPLE, might resurface.
664 This perhaps could run
665 if (CONVERT_EXPR_P (var0))
666 {
667 gimplify_conversion (&var0);
668 // Attempt to fill in any within var0 found ARRAY_REF's
669 // element size from corresponding op embedded ARRAY_REF,
670 // if unsuccessful, just punt.
671 } */
672 while (POINTER_TYPE_P (type))
673 type = TREE_TYPE (type);
674 if (int_size_in_bytes (type) < 0)
675 return false;
676
677 *var = var0;
678 *off = off0;
679 return true;
680 }
681
682 case SSA_NAME:
683 {
684 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
685 return false;
686
687 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
688 enum tree_code subcode;
689
690 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
691 return false;
692
693 var0 = gimple_assign_rhs1 (def_stmt);
694 subcode = gimple_assign_rhs_code (def_stmt);
695 var1 = gimple_assign_rhs2 (def_stmt);
696
697 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
698 }
699 CASE_CONVERT:
700 {
701 /* We must not introduce undefined overflow, and we must not change the value.
702 Hence we're okay if the inner type doesn't overflow to start with
703 (pointer or signed), the outer type also is an integer or pointer
704 and the outer precision is at least as large as the inner. */
705 tree itype = TREE_TYPE (op0);
706 if ((POINTER_TYPE_P (itype)
707 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
708 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
709 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
710 {
711 split_constant_offset (op0, &var0, off);
712 *var = fold_convert (type, var0);
713 return true;
714 }
715 return false;
716 }
717
718 default:
719 return false;
720 }
721 }
722
723 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
724 will be ssizetype. */
725
726 void
727 split_constant_offset (tree exp, tree *var, tree *off)
728 {
729 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
730 enum tree_code code;
731
732 *var = exp;
733 *off = ssize_int (0);
734 STRIP_NOPS (exp);
735
736 if (tree_is_chrec (exp)
737 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
738 return;
739
740 otype = TREE_TYPE (exp);
741 code = TREE_CODE (exp);
742 extract_ops_from_tree (exp, &code, &op0, &op1);
743 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
744 {
745 *var = fold_convert (type, e);
746 *off = o;
747 }
748 }
749
750 /* Returns the address ADDR of an object in a canonical shape (without nop
751 casts, and with type of pointer to the object). */
752
753 static tree
754 canonicalize_base_object_address (tree addr)
755 {
756 tree orig = addr;
757
758 STRIP_NOPS (addr);
759
760 /* The base address may be obtained by casting from integer, in that case
761 keep the cast. */
762 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
763 return orig;
764
765 if (TREE_CODE (addr) != ADDR_EXPR)
766 return addr;
767
768 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
769 }
770
771 /* Analyzes the behavior of the memory reference DR in the innermost loop or
772 basic block that contains it. Returns true if analysis succeed or false
773 otherwise. */
774
775 bool
776 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
777 {
778 gimple stmt = DR_STMT (dr);
779 struct loop *loop = loop_containing_stmt (stmt);
780 tree ref = DR_REF (dr);
781 HOST_WIDE_INT pbitsize, pbitpos;
782 tree base, poffset;
783 machine_mode pmode;
784 int punsignedp, pvolatilep;
785 affine_iv base_iv, offset_iv;
786 tree init, dinit, step;
787 bool in_loop = (loop && loop->num);
788
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "analyze_innermost: ");
791
792 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
793 &pmode, &punsignedp, &pvolatilep, false);
794 gcc_assert (base != NULL_TREE);
795
796 if (pbitpos % BITS_PER_UNIT != 0)
797 {
798 if (dump_file && (dump_flags & TDF_DETAILS))
799 fprintf (dump_file, "failed: bit offset alignment.\n");
800 return false;
801 }
802
803 if (TREE_CODE (base) == MEM_REF)
804 {
805 if (!integer_zerop (TREE_OPERAND (base, 1)))
806 {
807 offset_int moff = mem_ref_offset (base);
808 tree mofft = wide_int_to_tree (sizetype, moff);
809 if (!poffset)
810 poffset = mofft;
811 else
812 poffset = size_binop (PLUS_EXPR, poffset, mofft);
813 }
814 base = TREE_OPERAND (base, 0);
815 }
816 else
817 base = build_fold_addr_expr (base);
818
819 if (in_loop)
820 {
821 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
822 nest ? true : false))
823 {
824 if (nest)
825 {
826 if (dump_file && (dump_flags & TDF_DETAILS))
827 fprintf (dump_file, "failed: evolution of base is not"
828 " affine.\n");
829 return false;
830 }
831 else
832 {
833 base_iv.base = base;
834 base_iv.step = ssize_int (0);
835 base_iv.no_overflow = true;
836 }
837 }
838 }
839 else
840 {
841 base_iv.base = base;
842 base_iv.step = ssize_int (0);
843 base_iv.no_overflow = true;
844 }
845
846 if (!poffset)
847 {
848 offset_iv.base = ssize_int (0);
849 offset_iv.step = ssize_int (0);
850 }
851 else
852 {
853 if (!in_loop)
854 {
855 offset_iv.base = poffset;
856 offset_iv.step = ssize_int (0);
857 }
858 else if (!simple_iv (loop, loop_containing_stmt (stmt),
859 poffset, &offset_iv,
860 nest ? true : false))
861 {
862 if (nest)
863 {
864 if (dump_file && (dump_flags & TDF_DETAILS))
865 fprintf (dump_file, "failed: evolution of offset is not"
866 " affine.\n");
867 return false;
868 }
869 else
870 {
871 offset_iv.base = poffset;
872 offset_iv.step = ssize_int (0);
873 }
874 }
875 }
876
877 init = ssize_int (pbitpos / BITS_PER_UNIT);
878 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
879 init = size_binop (PLUS_EXPR, init, dinit);
880 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
881 init = size_binop (PLUS_EXPR, init, dinit);
882
883 step = size_binop (PLUS_EXPR,
884 fold_convert (ssizetype, base_iv.step),
885 fold_convert (ssizetype, offset_iv.step));
886
887 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
888
889 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
890 DR_INIT (dr) = init;
891 DR_STEP (dr) = step;
892
893 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
894
895 if (dump_file && (dump_flags & TDF_DETAILS))
896 fprintf (dump_file, "success.\n");
897
898 return true;
899 }
900
901 /* Determines the base object and the list of indices of memory reference
902 DR, analyzed in LOOP and instantiated in loop nest NEST. */
903
904 static void
905 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
906 {
907 vec<tree> access_fns = vNULL;
908 tree ref, op;
909 tree base, off, access_fn;
910 basic_block before_loop;
911
912 /* If analyzing a basic-block there are no indices to analyze
913 and thus no access functions. */
914 if (!nest)
915 {
916 DR_BASE_OBJECT (dr) = DR_REF (dr);
917 DR_ACCESS_FNS (dr).create (0);
918 return;
919 }
920
921 ref = DR_REF (dr);
922 before_loop = block_before_loop (nest);
923
924 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
925 into a two element array with a constant index. The base is
926 then just the immediate underlying object. */
927 if (TREE_CODE (ref) == REALPART_EXPR)
928 {
929 ref = TREE_OPERAND (ref, 0);
930 access_fns.safe_push (integer_zero_node);
931 }
932 else if (TREE_CODE (ref) == IMAGPART_EXPR)
933 {
934 ref = TREE_OPERAND (ref, 0);
935 access_fns.safe_push (integer_one_node);
936 }
937
938 /* Analyze access functions of dimensions we know to be independent. */
939 while (handled_component_p (ref))
940 {
941 if (TREE_CODE (ref) == ARRAY_REF)
942 {
943 op = TREE_OPERAND (ref, 1);
944 access_fn = analyze_scalar_evolution (loop, op);
945 access_fn = instantiate_scev (before_loop, loop, access_fn);
946 access_fns.safe_push (access_fn);
947 }
948 else if (TREE_CODE (ref) == COMPONENT_REF
949 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
950 {
951 /* For COMPONENT_REFs of records (but not unions!) use the
952 FIELD_DECL offset as constant access function so we can
953 disambiguate a[i].f1 and a[i].f2. */
954 tree off = component_ref_field_offset (ref);
955 off = size_binop (PLUS_EXPR,
956 size_binop (MULT_EXPR,
957 fold_convert (bitsizetype, off),
958 bitsize_int (BITS_PER_UNIT)),
959 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
960 access_fns.safe_push (off);
961 }
962 else
963 /* If we have an unhandled component we could not translate
964 to an access function stop analyzing. We have determined
965 our base object in this case. */
966 break;
967
968 ref = TREE_OPERAND (ref, 0);
969 }
970
971 /* If the address operand of a MEM_REF base has an evolution in the
972 analyzed nest, add it as an additional independent access-function. */
973 if (TREE_CODE (ref) == MEM_REF)
974 {
975 op = TREE_OPERAND (ref, 0);
976 access_fn = analyze_scalar_evolution (loop, op);
977 access_fn = instantiate_scev (before_loop, loop, access_fn);
978 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
979 {
980 tree orig_type;
981 tree memoff = TREE_OPERAND (ref, 1);
982 base = initial_condition (access_fn);
983 orig_type = TREE_TYPE (base);
984 STRIP_USELESS_TYPE_CONVERSION (base);
985 split_constant_offset (base, &base, &off);
986 STRIP_USELESS_TYPE_CONVERSION (base);
987 /* Fold the MEM_REF offset into the evolutions initial
988 value to make more bases comparable. */
989 if (!integer_zerop (memoff))
990 {
991 off = size_binop (PLUS_EXPR, off,
992 fold_convert (ssizetype, memoff));
993 memoff = build_int_cst (TREE_TYPE (memoff), 0);
994 }
995 access_fn = chrec_replace_initial_condition
996 (access_fn, fold_convert (orig_type, off));
997 /* ??? This is still not a suitable base object for
998 dr_may_alias_p - the base object needs to be an
999 access that covers the object as whole. With
1000 an evolution in the pointer this cannot be
1001 guaranteed.
1002 As a band-aid, mark the access so we can special-case
1003 it in dr_may_alias_p. */
1004 tree old = ref;
1005 ref = fold_build2_loc (EXPR_LOCATION (ref),
1006 MEM_REF, TREE_TYPE (ref),
1007 base, memoff);
1008 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1009 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1010 access_fns.safe_push (access_fn);
1011 }
1012 }
1013 else if (DECL_P (ref))
1014 {
1015 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1016 ref = build2 (MEM_REF, TREE_TYPE (ref),
1017 build_fold_addr_expr (ref),
1018 build_int_cst (reference_alias_ptr_type (ref), 0));
1019 }
1020
1021 DR_BASE_OBJECT (dr) = ref;
1022 DR_ACCESS_FNS (dr) = access_fns;
1023 }
1024
1025 /* Extracts the alias analysis information from the memory reference DR. */
1026
1027 static void
1028 dr_analyze_alias (struct data_reference *dr)
1029 {
1030 tree ref = DR_REF (dr);
1031 tree base = get_base_address (ref), addr;
1032
1033 if (INDIRECT_REF_P (base)
1034 || TREE_CODE (base) == MEM_REF)
1035 {
1036 addr = TREE_OPERAND (base, 0);
1037 if (TREE_CODE (addr) == SSA_NAME)
1038 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1039 }
1040 }
1041
1042 /* Frees data reference DR. */
1043
1044 void
1045 free_data_ref (data_reference_p dr)
1046 {
1047 DR_ACCESS_FNS (dr).release ();
1048 free (dr);
1049 }
1050
1051 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1052 is read if IS_READ is true, write otherwise. Returns the
1053 data_reference description of MEMREF. NEST is the outermost loop
1054 in which the reference should be instantiated, LOOP is the loop in
1055 which the data reference should be analyzed. */
1056
1057 struct data_reference *
1058 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1059 bool is_read)
1060 {
1061 struct data_reference *dr;
1062
1063 if (dump_file && (dump_flags & TDF_DETAILS))
1064 {
1065 fprintf (dump_file, "Creating dr for ");
1066 print_generic_expr (dump_file, memref, TDF_SLIM);
1067 fprintf (dump_file, "\n");
1068 }
1069
1070 dr = XCNEW (struct data_reference);
1071 DR_STMT (dr) = stmt;
1072 DR_REF (dr) = memref;
1073 DR_IS_READ (dr) = is_read;
1074
1075 dr_analyze_innermost (dr, nest);
1076 dr_analyze_indices (dr, nest, loop);
1077 dr_analyze_alias (dr);
1078
1079 if (dump_file && (dump_flags & TDF_DETAILS))
1080 {
1081 unsigned i;
1082 fprintf (dump_file, "\tbase_address: ");
1083 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1084 fprintf (dump_file, "\n\toffset from base address: ");
1085 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1086 fprintf (dump_file, "\n\tconstant offset from base address: ");
1087 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1088 fprintf (dump_file, "\n\tstep: ");
1089 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1090 fprintf (dump_file, "\n\taligned to: ");
1091 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1092 fprintf (dump_file, "\n\tbase_object: ");
1093 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1094 fprintf (dump_file, "\n");
1095 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1096 {
1097 fprintf (dump_file, "\tAccess function %d: ", i);
1098 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1099 }
1100 }
1101
1102 return dr;
1103 }
1104
1105 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1106 expressions. */
1107 static bool
1108 dr_equal_offsets_p1 (tree offset1, tree offset2)
1109 {
1110 bool res;
1111
1112 STRIP_NOPS (offset1);
1113 STRIP_NOPS (offset2);
1114
1115 if (offset1 == offset2)
1116 return true;
1117
1118 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1119 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1120 return false;
1121
1122 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1123 TREE_OPERAND (offset2, 0));
1124
1125 if (!res || !BINARY_CLASS_P (offset1))
1126 return res;
1127
1128 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1129 TREE_OPERAND (offset2, 1));
1130
1131 return res;
1132 }
1133
1134 /* Check if DRA and DRB have equal offsets. */
1135 bool
1136 dr_equal_offsets_p (struct data_reference *dra,
1137 struct data_reference *drb)
1138 {
1139 tree offset1, offset2;
1140
1141 offset1 = DR_OFFSET (dra);
1142 offset2 = DR_OFFSET (drb);
1143
1144 return dr_equal_offsets_p1 (offset1, offset2);
1145 }
1146
1147 /* Returns true if FNA == FNB. */
1148
1149 static bool
1150 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1151 {
1152 unsigned i, n = fna.length ();
1153
1154 if (n != fnb.length ())
1155 return false;
1156
1157 for (i = 0; i < n; i++)
1158 if (!operand_equal_p (fna[i], fnb[i], 0))
1159 return false;
1160
1161 return true;
1162 }
1163
1164 /* If all the functions in CF are the same, returns one of them,
1165 otherwise returns NULL. */
1166
1167 static affine_fn
1168 common_affine_function (conflict_function *cf)
1169 {
1170 unsigned i;
1171 affine_fn comm;
1172
1173 if (!CF_NONTRIVIAL_P (cf))
1174 return affine_fn ();
1175
1176 comm = cf->fns[0];
1177
1178 for (i = 1; i < cf->n; i++)
1179 if (!affine_function_equal_p (comm, cf->fns[i]))
1180 return affine_fn ();
1181
1182 return comm;
1183 }
1184
1185 /* Returns the base of the affine function FN. */
1186
1187 static tree
1188 affine_function_base (affine_fn fn)
1189 {
1190 return fn[0];
1191 }
1192
1193 /* Returns true if FN is a constant. */
1194
1195 static bool
1196 affine_function_constant_p (affine_fn fn)
1197 {
1198 unsigned i;
1199 tree coef;
1200
1201 for (i = 1; fn.iterate (i, &coef); i++)
1202 if (!integer_zerop (coef))
1203 return false;
1204
1205 return true;
1206 }
1207
1208 /* Returns true if FN is the zero constant function. */
1209
1210 static bool
1211 affine_function_zero_p (affine_fn fn)
1212 {
1213 return (integer_zerop (affine_function_base (fn))
1214 && affine_function_constant_p (fn));
1215 }
1216
1217 /* Returns a signed integer type with the largest precision from TA
1218 and TB. */
1219
1220 static tree
1221 signed_type_for_types (tree ta, tree tb)
1222 {
1223 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1224 return signed_type_for (ta);
1225 else
1226 return signed_type_for (tb);
1227 }
1228
1229 /* Applies operation OP on affine functions FNA and FNB, and returns the
1230 result. */
1231
1232 static affine_fn
1233 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1234 {
1235 unsigned i, n, m;
1236 affine_fn ret;
1237 tree coef;
1238
1239 if (fnb.length () > fna.length ())
1240 {
1241 n = fna.length ();
1242 m = fnb.length ();
1243 }
1244 else
1245 {
1246 n = fnb.length ();
1247 m = fna.length ();
1248 }
1249
1250 ret.create (m);
1251 for (i = 0; i < n; i++)
1252 {
1253 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1254 TREE_TYPE (fnb[i]));
1255 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1256 }
1257
1258 for (; fna.iterate (i, &coef); i++)
1259 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1260 coef, integer_zero_node));
1261 for (; fnb.iterate (i, &coef); i++)
1262 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1263 integer_zero_node, coef));
1264
1265 return ret;
1266 }
1267
1268 /* Returns the sum of affine functions FNA and FNB. */
1269
1270 static affine_fn
1271 affine_fn_plus (affine_fn fna, affine_fn fnb)
1272 {
1273 return affine_fn_op (PLUS_EXPR, fna, fnb);
1274 }
1275
1276 /* Returns the difference of affine functions FNA and FNB. */
1277
1278 static affine_fn
1279 affine_fn_minus (affine_fn fna, affine_fn fnb)
1280 {
1281 return affine_fn_op (MINUS_EXPR, fna, fnb);
1282 }
1283
1284 /* Frees affine function FN. */
1285
1286 static void
1287 affine_fn_free (affine_fn fn)
1288 {
1289 fn.release ();
1290 }
1291
1292 /* Determine for each subscript in the data dependence relation DDR
1293 the distance. */
1294
1295 static void
1296 compute_subscript_distance (struct data_dependence_relation *ddr)
1297 {
1298 conflict_function *cf_a, *cf_b;
1299 affine_fn fn_a, fn_b, diff;
1300
1301 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1302 {
1303 unsigned int i;
1304
1305 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1306 {
1307 struct subscript *subscript;
1308
1309 subscript = DDR_SUBSCRIPT (ddr, i);
1310 cf_a = SUB_CONFLICTS_IN_A (subscript);
1311 cf_b = SUB_CONFLICTS_IN_B (subscript);
1312
1313 fn_a = common_affine_function (cf_a);
1314 fn_b = common_affine_function (cf_b);
1315 if (!fn_a.exists () || !fn_b.exists ())
1316 {
1317 SUB_DISTANCE (subscript) = chrec_dont_know;
1318 return;
1319 }
1320 diff = affine_fn_minus (fn_a, fn_b);
1321
1322 if (affine_function_constant_p (diff))
1323 SUB_DISTANCE (subscript) = affine_function_base (diff);
1324 else
1325 SUB_DISTANCE (subscript) = chrec_dont_know;
1326
1327 affine_fn_free (diff);
1328 }
1329 }
1330 }
1331
1332 /* Returns the conflict function for "unknown". */
1333
1334 static conflict_function *
1335 conflict_fn_not_known (void)
1336 {
1337 conflict_function *fn = XCNEW (conflict_function);
1338 fn->n = NOT_KNOWN;
1339
1340 return fn;
1341 }
1342
1343 /* Returns the conflict function for "independent". */
1344
1345 static conflict_function *
1346 conflict_fn_no_dependence (void)
1347 {
1348 conflict_function *fn = XCNEW (conflict_function);
1349 fn->n = NO_DEPENDENCE;
1350
1351 return fn;
1352 }
1353
1354 /* Returns true if the address of OBJ is invariant in LOOP. */
1355
1356 static bool
1357 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1358 {
1359 while (handled_component_p (obj))
1360 {
1361 if (TREE_CODE (obj) == ARRAY_REF)
1362 {
1363 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1364 need to check the stride and the lower bound of the reference. */
1365 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1366 loop->num)
1367 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1368 loop->num))
1369 return false;
1370 }
1371 else if (TREE_CODE (obj) == COMPONENT_REF)
1372 {
1373 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1374 loop->num))
1375 return false;
1376 }
1377 obj = TREE_OPERAND (obj, 0);
1378 }
1379
1380 if (!INDIRECT_REF_P (obj)
1381 && TREE_CODE (obj) != MEM_REF)
1382 return true;
1383
1384 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1385 loop->num);
1386 }
1387
1388 /* Returns false if we can prove that data references A and B do not alias,
1389 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1390 considered. */
1391
1392 bool
1393 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1394 bool loop_nest)
1395 {
1396 tree addr_a = DR_BASE_OBJECT (a);
1397 tree addr_b = DR_BASE_OBJECT (b);
1398
1399 /* If we are not processing a loop nest but scalar code we
1400 do not need to care about possible cross-iteration dependences
1401 and thus can process the full original reference. Do so,
1402 similar to how loop invariant motion applies extra offset-based
1403 disambiguation. */
1404 if (!loop_nest)
1405 {
1406 aff_tree off1, off2;
1407 widest_int size1, size2;
1408 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1409 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1410 aff_combination_scale (&off1, -1);
1411 aff_combination_add (&off2, &off1);
1412 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1413 return false;
1414 }
1415
1416 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
1417 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
1418 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
1419 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
1420 return false;
1421
1422 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1423 do not know the size of the base-object. So we cannot do any
1424 offset/overlap based analysis but have to rely on points-to
1425 information only. */
1426 if (TREE_CODE (addr_a) == MEM_REF
1427 && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)
1428 {
1429 /* For true dependences we can apply TBAA. */
1430 if (flag_strict_aliasing
1431 && DR_IS_WRITE (a) && DR_IS_READ (b)
1432 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1433 get_alias_set (DR_REF (b))))
1434 return false;
1435 if (TREE_CODE (addr_b) == MEM_REF)
1436 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1437 TREE_OPERAND (addr_b, 0));
1438 else
1439 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1440 build_fold_addr_expr (addr_b));
1441 }
1442 else if (TREE_CODE (addr_b) == MEM_REF
1443 && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)
1444 {
1445 /* For true dependences we can apply TBAA. */
1446 if (flag_strict_aliasing
1447 && DR_IS_WRITE (a) && DR_IS_READ (b)
1448 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1449 get_alias_set (DR_REF (b))))
1450 return false;
1451 if (TREE_CODE (addr_a) == MEM_REF)
1452 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1453 TREE_OPERAND (addr_b, 0));
1454 else
1455 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1456 TREE_OPERAND (addr_b, 0));
1457 }
1458
1459 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1460 that is being subsetted in the loop nest. */
1461 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1462 return refs_output_dependent_p (addr_a, addr_b);
1463 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1464 return refs_anti_dependent_p (addr_a, addr_b);
1465 return refs_may_alias_p (addr_a, addr_b);
1466 }
1467
1468 /* Initialize a data dependence relation between data accesses A and
1469 B. NB_LOOPS is the number of loops surrounding the references: the
1470 size of the classic distance/direction vectors. */
1471
1472 struct data_dependence_relation *
1473 initialize_data_dependence_relation (struct data_reference *a,
1474 struct data_reference *b,
1475 vec<loop_p> loop_nest)
1476 {
1477 struct data_dependence_relation *res;
1478 unsigned int i;
1479
1480 res = XNEW (struct data_dependence_relation);
1481 DDR_A (res) = a;
1482 DDR_B (res) = b;
1483 DDR_LOOP_NEST (res).create (0);
1484 DDR_REVERSED_P (res) = false;
1485 DDR_SUBSCRIPTS (res).create (0);
1486 DDR_DIR_VECTS (res).create (0);
1487 DDR_DIST_VECTS (res).create (0);
1488
1489 if (a == NULL || b == NULL)
1490 {
1491 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1492 return res;
1493 }
1494
1495 /* If the data references do not alias, then they are independent. */
1496 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1497 {
1498 DDR_ARE_DEPENDENT (res) = chrec_known;
1499 return res;
1500 }
1501
1502 /* The case where the references are exactly the same. */
1503 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1504 {
1505 if (loop_nest.exists ()
1506 && !object_address_invariant_in_loop_p (loop_nest[0],
1507 DR_BASE_OBJECT (a)))
1508 {
1509 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1510 return res;
1511 }
1512 DDR_AFFINE_P (res) = true;
1513 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1514 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1515 DDR_LOOP_NEST (res) = loop_nest;
1516 DDR_INNER_LOOP (res) = 0;
1517 DDR_SELF_REFERENCE (res) = true;
1518 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1519 {
1520 struct subscript *subscript;
1521
1522 subscript = XNEW (struct subscript);
1523 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1524 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1525 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1526 SUB_DISTANCE (subscript) = chrec_dont_know;
1527 DDR_SUBSCRIPTS (res).safe_push (subscript);
1528 }
1529 return res;
1530 }
1531
1532 /* If the references do not access the same object, we do not know
1533 whether they alias or not. */
1534 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1535 {
1536 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1537 return res;
1538 }
1539
1540 /* If the base of the object is not invariant in the loop nest, we cannot
1541 analyze it. TODO -- in fact, it would suffice to record that there may
1542 be arbitrary dependences in the loops where the base object varies. */
1543 if (loop_nest.exists ()
1544 && !object_address_invariant_in_loop_p (loop_nest[0],
1545 DR_BASE_OBJECT (a)))
1546 {
1547 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1548 return res;
1549 }
1550
1551 /* If the number of dimensions of the access to not agree we can have
1552 a pointer access to a component of the array element type and an
1553 array access while the base-objects are still the same. Punt. */
1554 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1555 {
1556 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1557 return res;
1558 }
1559
1560 DDR_AFFINE_P (res) = true;
1561 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1562 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1563 DDR_LOOP_NEST (res) = loop_nest;
1564 DDR_INNER_LOOP (res) = 0;
1565 DDR_SELF_REFERENCE (res) = false;
1566
1567 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1568 {
1569 struct subscript *subscript;
1570
1571 subscript = XNEW (struct subscript);
1572 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1573 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1574 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1575 SUB_DISTANCE (subscript) = chrec_dont_know;
1576 DDR_SUBSCRIPTS (res).safe_push (subscript);
1577 }
1578
1579 return res;
1580 }
1581
1582 /* Frees memory used by the conflict function F. */
1583
1584 static void
1585 free_conflict_function (conflict_function *f)
1586 {
1587 unsigned i;
1588
1589 if (CF_NONTRIVIAL_P (f))
1590 {
1591 for (i = 0; i < f->n; i++)
1592 affine_fn_free (f->fns[i]);
1593 }
1594 free (f);
1595 }
1596
1597 /* Frees memory used by SUBSCRIPTS. */
1598
1599 static void
1600 free_subscripts (vec<subscript_p> subscripts)
1601 {
1602 unsigned i;
1603 subscript_p s;
1604
1605 FOR_EACH_VEC_ELT (subscripts, i, s)
1606 {
1607 free_conflict_function (s->conflicting_iterations_in_a);
1608 free_conflict_function (s->conflicting_iterations_in_b);
1609 free (s);
1610 }
1611 subscripts.release ();
1612 }
1613
1614 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1615 description. */
1616
1617 static inline void
1618 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1619 tree chrec)
1620 {
1621 DDR_ARE_DEPENDENT (ddr) = chrec;
1622 free_subscripts (DDR_SUBSCRIPTS (ddr));
1623 DDR_SUBSCRIPTS (ddr).create (0);
1624 }
1625
1626 /* The dependence relation DDR cannot be represented by a distance
1627 vector. */
1628
1629 static inline void
1630 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1631 {
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1633 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1634
1635 DDR_AFFINE_P (ddr) = false;
1636 }
1637
1638 \f
1639
1640 /* This section contains the classic Banerjee tests. */
1641
1642 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1643 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1644
1645 static inline bool
1646 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1647 {
1648 return (evolution_function_is_constant_p (chrec_a)
1649 && evolution_function_is_constant_p (chrec_b));
1650 }
1651
1652 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1653 variable, i.e., if the SIV (Single Index Variable) test is true. */
1654
1655 static bool
1656 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1657 {
1658 if ((evolution_function_is_constant_p (chrec_a)
1659 && evolution_function_is_univariate_p (chrec_b))
1660 || (evolution_function_is_constant_p (chrec_b)
1661 && evolution_function_is_univariate_p (chrec_a)))
1662 return true;
1663
1664 if (evolution_function_is_univariate_p (chrec_a)
1665 && evolution_function_is_univariate_p (chrec_b))
1666 {
1667 switch (TREE_CODE (chrec_a))
1668 {
1669 case POLYNOMIAL_CHREC:
1670 switch (TREE_CODE (chrec_b))
1671 {
1672 case POLYNOMIAL_CHREC:
1673 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1674 return false;
1675
1676 default:
1677 return true;
1678 }
1679
1680 default:
1681 return true;
1682 }
1683 }
1684
1685 return false;
1686 }
1687
1688 /* Creates a conflict function with N dimensions. The affine functions
1689 in each dimension follow. */
1690
1691 static conflict_function *
1692 conflict_fn (unsigned n, ...)
1693 {
1694 unsigned i;
1695 conflict_function *ret = XCNEW (conflict_function);
1696 va_list ap;
1697
1698 gcc_assert (0 < n && n <= MAX_DIM);
1699 va_start (ap, n);
1700
1701 ret->n = n;
1702 for (i = 0; i < n; i++)
1703 ret->fns[i] = va_arg (ap, affine_fn);
1704 va_end (ap);
1705
1706 return ret;
1707 }
1708
1709 /* Returns constant affine function with value CST. */
1710
1711 static affine_fn
1712 affine_fn_cst (tree cst)
1713 {
1714 affine_fn fn;
1715 fn.create (1);
1716 fn.quick_push (cst);
1717 return fn;
1718 }
1719
1720 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1721
1722 static affine_fn
1723 affine_fn_univar (tree cst, unsigned dim, tree coef)
1724 {
1725 affine_fn fn;
1726 fn.create (dim + 1);
1727 unsigned i;
1728
1729 gcc_assert (dim > 0);
1730 fn.quick_push (cst);
1731 for (i = 1; i < dim; i++)
1732 fn.quick_push (integer_zero_node);
1733 fn.quick_push (coef);
1734 return fn;
1735 }
1736
1737 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1738 *OVERLAPS_B are initialized to the functions that describe the
1739 relation between the elements accessed twice by CHREC_A and
1740 CHREC_B. For k >= 0, the following property is verified:
1741
1742 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1743
1744 static void
1745 analyze_ziv_subscript (tree chrec_a,
1746 tree chrec_b,
1747 conflict_function **overlaps_a,
1748 conflict_function **overlaps_b,
1749 tree *last_conflicts)
1750 {
1751 tree type, difference;
1752 dependence_stats.num_ziv++;
1753
1754 if (dump_file && (dump_flags & TDF_DETAILS))
1755 fprintf (dump_file, "(analyze_ziv_subscript \n");
1756
1757 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1758 chrec_a = chrec_convert (type, chrec_a, NULL);
1759 chrec_b = chrec_convert (type, chrec_b, NULL);
1760 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1761
1762 switch (TREE_CODE (difference))
1763 {
1764 case INTEGER_CST:
1765 if (integer_zerop (difference))
1766 {
1767 /* The difference is equal to zero: the accessed index
1768 overlaps for each iteration in the loop. */
1769 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1770 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1771 *last_conflicts = chrec_dont_know;
1772 dependence_stats.num_ziv_dependent++;
1773 }
1774 else
1775 {
1776 /* The accesses do not overlap. */
1777 *overlaps_a = conflict_fn_no_dependence ();
1778 *overlaps_b = conflict_fn_no_dependence ();
1779 *last_conflicts = integer_zero_node;
1780 dependence_stats.num_ziv_independent++;
1781 }
1782 break;
1783
1784 default:
1785 /* We're not sure whether the indexes overlap. For the moment,
1786 conservatively answer "don't know". */
1787 if (dump_file && (dump_flags & TDF_DETAILS))
1788 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1789
1790 *overlaps_a = conflict_fn_not_known ();
1791 *overlaps_b = conflict_fn_not_known ();
1792 *last_conflicts = chrec_dont_know;
1793 dependence_stats.num_ziv_unimplemented++;
1794 break;
1795 }
1796
1797 if (dump_file && (dump_flags & TDF_DETAILS))
1798 fprintf (dump_file, ")\n");
1799 }
1800
1801 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1802 and only if it fits to the int type. If this is not the case, or the
1803 bound on the number of iterations of LOOP could not be derived, returns
1804 chrec_dont_know. */
1805
1806 static tree
1807 max_stmt_executions_tree (struct loop *loop)
1808 {
1809 widest_int nit;
1810
1811 if (!max_stmt_executions (loop, &nit))
1812 return chrec_dont_know;
1813
1814 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1815 return chrec_dont_know;
1816
1817 return wide_int_to_tree (unsigned_type_node, nit);
1818 }
1819
1820 /* Determine whether the CHREC is always positive/negative. If the expression
1821 cannot be statically analyzed, return false, otherwise set the answer into
1822 VALUE. */
1823
1824 static bool
1825 chrec_is_positive (tree chrec, bool *value)
1826 {
1827 bool value0, value1, value2;
1828 tree end_value, nb_iter;
1829
1830 switch (TREE_CODE (chrec))
1831 {
1832 case POLYNOMIAL_CHREC:
1833 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1834 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1835 return false;
1836
1837 /* FIXME -- overflows. */
1838 if (value0 == value1)
1839 {
1840 *value = value0;
1841 return true;
1842 }
1843
1844 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1845 and the proof consists in showing that the sign never
1846 changes during the execution of the loop, from 0 to
1847 loop->nb_iterations. */
1848 if (!evolution_function_is_affine_p (chrec))
1849 return false;
1850
1851 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1852 if (chrec_contains_undetermined (nb_iter))
1853 return false;
1854
1855 #if 0
1856 /* TODO -- If the test is after the exit, we may decrease the number of
1857 iterations by one. */
1858 if (after_exit)
1859 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1860 #endif
1861
1862 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1863
1864 if (!chrec_is_positive (end_value, &value2))
1865 return false;
1866
1867 *value = value0;
1868 return value0 == value1;
1869
1870 case INTEGER_CST:
1871 switch (tree_int_cst_sgn (chrec))
1872 {
1873 case -1:
1874 *value = false;
1875 break;
1876 case 1:
1877 *value = true;
1878 break;
1879 default:
1880 return false;
1881 }
1882 return true;
1883
1884 default:
1885 return false;
1886 }
1887 }
1888
1889
1890 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1891 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1892 *OVERLAPS_B are initialized to the functions that describe the
1893 relation between the elements accessed twice by CHREC_A and
1894 CHREC_B. For k >= 0, the following property is verified:
1895
1896 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1897
1898 static void
1899 analyze_siv_subscript_cst_affine (tree chrec_a,
1900 tree chrec_b,
1901 conflict_function **overlaps_a,
1902 conflict_function **overlaps_b,
1903 tree *last_conflicts)
1904 {
1905 bool value0, value1, value2;
1906 tree type, difference, tmp;
1907
1908 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1909 chrec_a = chrec_convert (type, chrec_a, NULL);
1910 chrec_b = chrec_convert (type, chrec_b, NULL);
1911 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1912
1913 /* Special case overlap in the first iteration. */
1914 if (integer_zerop (difference))
1915 {
1916 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1917 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1918 *last_conflicts = integer_one_node;
1919 return;
1920 }
1921
1922 if (!chrec_is_positive (initial_condition (difference), &value0))
1923 {
1924 if (dump_file && (dump_flags & TDF_DETAILS))
1925 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1926
1927 dependence_stats.num_siv_unimplemented++;
1928 *overlaps_a = conflict_fn_not_known ();
1929 *overlaps_b = conflict_fn_not_known ();
1930 *last_conflicts = chrec_dont_know;
1931 return;
1932 }
1933 else
1934 {
1935 if (value0 == false)
1936 {
1937 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1938 {
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1941
1942 *overlaps_a = conflict_fn_not_known ();
1943 *overlaps_b = conflict_fn_not_known ();
1944 *last_conflicts = chrec_dont_know;
1945 dependence_stats.num_siv_unimplemented++;
1946 return;
1947 }
1948 else
1949 {
1950 if (value1 == true)
1951 {
1952 /* Example:
1953 chrec_a = 12
1954 chrec_b = {10, +, 1}
1955 */
1956
1957 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1958 {
1959 HOST_WIDE_INT numiter;
1960 struct loop *loop = get_chrec_loop (chrec_b);
1961
1962 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1963 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1964 fold_build1 (ABS_EXPR, type, difference),
1965 CHREC_RIGHT (chrec_b));
1966 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1967 *last_conflicts = integer_one_node;
1968
1969
1970 /* Perform weak-zero siv test to see if overlap is
1971 outside the loop bounds. */
1972 numiter = max_stmt_executions_int (loop);
1973
1974 if (numiter >= 0
1975 && compare_tree_int (tmp, numiter) > 0)
1976 {
1977 free_conflict_function (*overlaps_a);
1978 free_conflict_function (*overlaps_b);
1979 *overlaps_a = conflict_fn_no_dependence ();
1980 *overlaps_b = conflict_fn_no_dependence ();
1981 *last_conflicts = integer_zero_node;
1982 dependence_stats.num_siv_independent++;
1983 return;
1984 }
1985 dependence_stats.num_siv_dependent++;
1986 return;
1987 }
1988
1989 /* When the step does not divide the difference, there are
1990 no overlaps. */
1991 else
1992 {
1993 *overlaps_a = conflict_fn_no_dependence ();
1994 *overlaps_b = conflict_fn_no_dependence ();
1995 *last_conflicts = integer_zero_node;
1996 dependence_stats.num_siv_independent++;
1997 return;
1998 }
1999 }
2000
2001 else
2002 {
2003 /* Example:
2004 chrec_a = 12
2005 chrec_b = {10, +, -1}
2006
2007 In this case, chrec_a will not overlap with chrec_b. */
2008 *overlaps_a = conflict_fn_no_dependence ();
2009 *overlaps_b = conflict_fn_no_dependence ();
2010 *last_conflicts = integer_zero_node;
2011 dependence_stats.num_siv_independent++;
2012 return;
2013 }
2014 }
2015 }
2016 else
2017 {
2018 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2019 {
2020 if (dump_file && (dump_flags & TDF_DETAILS))
2021 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2022
2023 *overlaps_a = conflict_fn_not_known ();
2024 *overlaps_b = conflict_fn_not_known ();
2025 *last_conflicts = chrec_dont_know;
2026 dependence_stats.num_siv_unimplemented++;
2027 return;
2028 }
2029 else
2030 {
2031 if (value2 == false)
2032 {
2033 /* Example:
2034 chrec_a = 3
2035 chrec_b = {10, +, -1}
2036 */
2037 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2038 {
2039 HOST_WIDE_INT numiter;
2040 struct loop *loop = get_chrec_loop (chrec_b);
2041
2042 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2043 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2044 CHREC_RIGHT (chrec_b));
2045 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2046 *last_conflicts = integer_one_node;
2047
2048 /* Perform weak-zero siv test to see if overlap is
2049 outside the loop bounds. */
2050 numiter = max_stmt_executions_int (loop);
2051
2052 if (numiter >= 0
2053 && compare_tree_int (tmp, numiter) > 0)
2054 {
2055 free_conflict_function (*overlaps_a);
2056 free_conflict_function (*overlaps_b);
2057 *overlaps_a = conflict_fn_no_dependence ();
2058 *overlaps_b = conflict_fn_no_dependence ();
2059 *last_conflicts = integer_zero_node;
2060 dependence_stats.num_siv_independent++;
2061 return;
2062 }
2063 dependence_stats.num_siv_dependent++;
2064 return;
2065 }
2066
2067 /* When the step does not divide the difference, there
2068 are no overlaps. */
2069 else
2070 {
2071 *overlaps_a = conflict_fn_no_dependence ();
2072 *overlaps_b = conflict_fn_no_dependence ();
2073 *last_conflicts = integer_zero_node;
2074 dependence_stats.num_siv_independent++;
2075 return;
2076 }
2077 }
2078 else
2079 {
2080 /* Example:
2081 chrec_a = 3
2082 chrec_b = {4, +, 1}
2083
2084 In this case, chrec_a will not overlap with chrec_b. */
2085 *overlaps_a = conflict_fn_no_dependence ();
2086 *overlaps_b = conflict_fn_no_dependence ();
2087 *last_conflicts = integer_zero_node;
2088 dependence_stats.num_siv_independent++;
2089 return;
2090 }
2091 }
2092 }
2093 }
2094 }
2095
2096 /* Helper recursive function for initializing the matrix A. Returns
2097 the initial value of CHREC. */
2098
2099 static tree
2100 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2101 {
2102 gcc_assert (chrec);
2103
2104 switch (TREE_CODE (chrec))
2105 {
2106 case POLYNOMIAL_CHREC:
2107 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2108
2109 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2110 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2111
2112 case PLUS_EXPR:
2113 case MULT_EXPR:
2114 case MINUS_EXPR:
2115 {
2116 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2117 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2118
2119 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2120 }
2121
2122 CASE_CONVERT:
2123 {
2124 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2125 return chrec_convert (chrec_type (chrec), op, NULL);
2126 }
2127
2128 case BIT_NOT_EXPR:
2129 {
2130 /* Handle ~X as -1 - X. */
2131 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2132 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2133 build_int_cst (TREE_TYPE (chrec), -1), op);
2134 }
2135
2136 case INTEGER_CST:
2137 return chrec;
2138
2139 default:
2140 gcc_unreachable ();
2141 return NULL_TREE;
2142 }
2143 }
2144
2145 #define FLOOR_DIV(x,y) ((x) / (y))
2146
2147 /* Solves the special case of the Diophantine equation:
2148 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2149
2150 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2151 number of iterations that loops X and Y run. The overlaps will be
2152 constructed as evolutions in dimension DIM. */
2153
2154 static void
2155 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2156 affine_fn *overlaps_a,
2157 affine_fn *overlaps_b,
2158 tree *last_conflicts, int dim)
2159 {
2160 if (((step_a > 0 && step_b > 0)
2161 || (step_a < 0 && step_b < 0)))
2162 {
2163 int step_overlaps_a, step_overlaps_b;
2164 int gcd_steps_a_b, last_conflict, tau2;
2165
2166 gcd_steps_a_b = gcd (step_a, step_b);
2167 step_overlaps_a = step_b / gcd_steps_a_b;
2168 step_overlaps_b = step_a / gcd_steps_a_b;
2169
2170 if (niter > 0)
2171 {
2172 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2173 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2174 last_conflict = tau2;
2175 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2176 }
2177 else
2178 *last_conflicts = chrec_dont_know;
2179
2180 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2181 build_int_cst (NULL_TREE,
2182 step_overlaps_a));
2183 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2184 build_int_cst (NULL_TREE,
2185 step_overlaps_b));
2186 }
2187
2188 else
2189 {
2190 *overlaps_a = affine_fn_cst (integer_zero_node);
2191 *overlaps_b = affine_fn_cst (integer_zero_node);
2192 *last_conflicts = integer_zero_node;
2193 }
2194 }
2195
2196 /* Solves the special case of a Diophantine equation where CHREC_A is
2197 an affine bivariate function, and CHREC_B is an affine univariate
2198 function. For example,
2199
2200 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2201
2202 has the following overlapping functions:
2203
2204 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2205 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2206 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2207
2208 FORNOW: This is a specialized implementation for a case occurring in
2209 a common benchmark. Implement the general algorithm. */
2210
2211 static void
2212 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2213 conflict_function **overlaps_a,
2214 conflict_function **overlaps_b,
2215 tree *last_conflicts)
2216 {
2217 bool xz_p, yz_p, xyz_p;
2218 int step_x, step_y, step_z;
2219 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2220 affine_fn overlaps_a_xz, overlaps_b_xz;
2221 affine_fn overlaps_a_yz, overlaps_b_yz;
2222 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2223 affine_fn ova1, ova2, ovb;
2224 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2225
2226 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2227 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2228 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2229
2230 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2231 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2232 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2233
2234 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2235 {
2236 if (dump_file && (dump_flags & TDF_DETAILS))
2237 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2238
2239 *overlaps_a = conflict_fn_not_known ();
2240 *overlaps_b = conflict_fn_not_known ();
2241 *last_conflicts = chrec_dont_know;
2242 return;
2243 }
2244
2245 niter = MIN (niter_x, niter_z);
2246 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2247 &overlaps_a_xz,
2248 &overlaps_b_xz,
2249 &last_conflicts_xz, 1);
2250 niter = MIN (niter_y, niter_z);
2251 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2252 &overlaps_a_yz,
2253 &overlaps_b_yz,
2254 &last_conflicts_yz, 2);
2255 niter = MIN (niter_x, niter_z);
2256 niter = MIN (niter_y, niter);
2257 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2258 &overlaps_a_xyz,
2259 &overlaps_b_xyz,
2260 &last_conflicts_xyz, 3);
2261
2262 xz_p = !integer_zerop (last_conflicts_xz);
2263 yz_p = !integer_zerop (last_conflicts_yz);
2264 xyz_p = !integer_zerop (last_conflicts_xyz);
2265
2266 if (xz_p || yz_p || xyz_p)
2267 {
2268 ova1 = affine_fn_cst (integer_zero_node);
2269 ova2 = affine_fn_cst (integer_zero_node);
2270 ovb = affine_fn_cst (integer_zero_node);
2271 if (xz_p)
2272 {
2273 affine_fn t0 = ova1;
2274 affine_fn t2 = ovb;
2275
2276 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2277 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2278 affine_fn_free (t0);
2279 affine_fn_free (t2);
2280 *last_conflicts = last_conflicts_xz;
2281 }
2282 if (yz_p)
2283 {
2284 affine_fn t0 = ova2;
2285 affine_fn t2 = ovb;
2286
2287 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2288 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2289 affine_fn_free (t0);
2290 affine_fn_free (t2);
2291 *last_conflicts = last_conflicts_yz;
2292 }
2293 if (xyz_p)
2294 {
2295 affine_fn t0 = ova1;
2296 affine_fn t2 = ova2;
2297 affine_fn t4 = ovb;
2298
2299 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2300 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2301 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2302 affine_fn_free (t0);
2303 affine_fn_free (t2);
2304 affine_fn_free (t4);
2305 *last_conflicts = last_conflicts_xyz;
2306 }
2307 *overlaps_a = conflict_fn (2, ova1, ova2);
2308 *overlaps_b = conflict_fn (1, ovb);
2309 }
2310 else
2311 {
2312 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2313 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2314 *last_conflicts = integer_zero_node;
2315 }
2316
2317 affine_fn_free (overlaps_a_xz);
2318 affine_fn_free (overlaps_b_xz);
2319 affine_fn_free (overlaps_a_yz);
2320 affine_fn_free (overlaps_b_yz);
2321 affine_fn_free (overlaps_a_xyz);
2322 affine_fn_free (overlaps_b_xyz);
2323 }
2324
2325 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2326
2327 static void
2328 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2329 int size)
2330 {
2331 memcpy (vec2, vec1, size * sizeof (*vec1));
2332 }
2333
2334 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2335
2336 static void
2337 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2338 int m, int n)
2339 {
2340 int i;
2341
2342 for (i = 0; i < m; i++)
2343 lambda_vector_copy (mat1[i], mat2[i], n);
2344 }
2345
2346 /* Store the N x N identity matrix in MAT. */
2347
2348 static void
2349 lambda_matrix_id (lambda_matrix mat, int size)
2350 {
2351 int i, j;
2352
2353 for (i = 0; i < size; i++)
2354 for (j = 0; j < size; j++)
2355 mat[i][j] = (i == j) ? 1 : 0;
2356 }
2357
2358 /* Return the first nonzero element of vector VEC1 between START and N.
2359 We must have START <= N. Returns N if VEC1 is the zero vector. */
2360
2361 static int
2362 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2363 {
2364 int j = start;
2365 while (j < n && vec1[j] == 0)
2366 j++;
2367 return j;
2368 }
2369
2370 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2371 R2 = R2 + CONST1 * R1. */
2372
2373 static void
2374 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2375 {
2376 int i;
2377
2378 if (const1 == 0)
2379 return;
2380
2381 for (i = 0; i < n; i++)
2382 mat[r2][i] += const1 * mat[r1][i];
2383 }
2384
2385 /* Swap rows R1 and R2 in matrix MAT. */
2386
2387 static void
2388 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2389 {
2390 lambda_vector row;
2391
2392 row = mat[r1];
2393 mat[r1] = mat[r2];
2394 mat[r2] = row;
2395 }
2396
2397 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2398 and store the result in VEC2. */
2399
2400 static void
2401 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2402 int size, int const1)
2403 {
2404 int i;
2405
2406 if (const1 == 0)
2407 lambda_vector_clear (vec2, size);
2408 else
2409 for (i = 0; i < size; i++)
2410 vec2[i] = const1 * vec1[i];
2411 }
2412
2413 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2414
2415 static void
2416 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2417 int size)
2418 {
2419 lambda_vector_mult_const (vec1, vec2, size, -1);
2420 }
2421
2422 /* Negate row R1 of matrix MAT which has N columns. */
2423
2424 static void
2425 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2426 {
2427 lambda_vector_negate (mat[r1], mat[r1], n);
2428 }
2429
2430 /* Return true if two vectors are equal. */
2431
2432 static bool
2433 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2434 {
2435 int i;
2436 for (i = 0; i < size; i++)
2437 if (vec1[i] != vec2[i])
2438 return false;
2439 return true;
2440 }
2441
2442 /* Given an M x N integer matrix A, this function determines an M x
2443 M unimodular matrix U, and an M x N echelon matrix S such that
2444 "U.A = S". This decomposition is also known as "right Hermite".
2445
2446 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2447 Restructuring Compilers" Utpal Banerjee. */
2448
2449 static void
2450 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2451 lambda_matrix S, lambda_matrix U)
2452 {
2453 int i, j, i0 = 0;
2454
2455 lambda_matrix_copy (A, S, m, n);
2456 lambda_matrix_id (U, m);
2457
2458 for (j = 0; j < n; j++)
2459 {
2460 if (lambda_vector_first_nz (S[j], m, i0) < m)
2461 {
2462 ++i0;
2463 for (i = m - 1; i >= i0; i--)
2464 {
2465 while (S[i][j] != 0)
2466 {
2467 int sigma, factor, a, b;
2468
2469 a = S[i-1][j];
2470 b = S[i][j];
2471 sigma = (a * b < 0) ? -1: 1;
2472 a = abs (a);
2473 b = abs (b);
2474 factor = sigma * (a / b);
2475
2476 lambda_matrix_row_add (S, n, i, i-1, -factor);
2477 lambda_matrix_row_exchange (S, i, i-1);
2478
2479 lambda_matrix_row_add (U, m, i, i-1, -factor);
2480 lambda_matrix_row_exchange (U, i, i-1);
2481 }
2482 }
2483 }
2484 }
2485 }
2486
2487 /* Determines the overlapping elements due to accesses CHREC_A and
2488 CHREC_B, that are affine functions. This function cannot handle
2489 symbolic evolution functions, ie. when initial conditions are
2490 parameters, because it uses lambda matrices of integers. */
2491
2492 static void
2493 analyze_subscript_affine_affine (tree chrec_a,
2494 tree chrec_b,
2495 conflict_function **overlaps_a,
2496 conflict_function **overlaps_b,
2497 tree *last_conflicts)
2498 {
2499 unsigned nb_vars_a, nb_vars_b, dim;
2500 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2501 lambda_matrix A, U, S;
2502 struct obstack scratch_obstack;
2503
2504 if (eq_evolutions_p (chrec_a, chrec_b))
2505 {
2506 /* The accessed index overlaps for each iteration in the
2507 loop. */
2508 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2509 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2510 *last_conflicts = chrec_dont_know;
2511 return;
2512 }
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2514 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2515
2516 /* For determining the initial intersection, we have to solve a
2517 Diophantine equation. This is the most time consuming part.
2518
2519 For answering to the question: "Is there a dependence?" we have
2520 to prove that there exists a solution to the Diophantine
2521 equation, and that the solution is in the iteration domain,
2522 i.e. the solution is positive or zero, and that the solution
2523 happens before the upper bound loop.nb_iterations. Otherwise
2524 there is no dependence. This function outputs a description of
2525 the iterations that hold the intersections. */
2526
2527 nb_vars_a = nb_vars_in_chrec (chrec_a);
2528 nb_vars_b = nb_vars_in_chrec (chrec_b);
2529
2530 gcc_obstack_init (&scratch_obstack);
2531
2532 dim = nb_vars_a + nb_vars_b;
2533 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2534 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2535 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2536
2537 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2538 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2539 gamma = init_b - init_a;
2540
2541 /* Don't do all the hard work of solving the Diophantine equation
2542 when we already know the solution: for example,
2543 | {3, +, 1}_1
2544 | {3, +, 4}_2
2545 | gamma = 3 - 3 = 0.
2546 Then the first overlap occurs during the first iterations:
2547 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2548 */
2549 if (gamma == 0)
2550 {
2551 if (nb_vars_a == 1 && nb_vars_b == 1)
2552 {
2553 HOST_WIDE_INT step_a, step_b;
2554 HOST_WIDE_INT niter, niter_a, niter_b;
2555 affine_fn ova, ovb;
2556
2557 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2558 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2559 niter = MIN (niter_a, niter_b);
2560 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2561 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2562
2563 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2564 &ova, &ovb,
2565 last_conflicts, 1);
2566 *overlaps_a = conflict_fn (1, ova);
2567 *overlaps_b = conflict_fn (1, ovb);
2568 }
2569
2570 else if (nb_vars_a == 2 && nb_vars_b == 1)
2571 compute_overlap_steps_for_affine_1_2
2572 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2573
2574 else if (nb_vars_a == 1 && nb_vars_b == 2)
2575 compute_overlap_steps_for_affine_1_2
2576 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2577
2578 else
2579 {
2580 if (dump_file && (dump_flags & TDF_DETAILS))
2581 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2582 *overlaps_a = conflict_fn_not_known ();
2583 *overlaps_b = conflict_fn_not_known ();
2584 *last_conflicts = chrec_dont_know;
2585 }
2586 goto end_analyze_subs_aa;
2587 }
2588
2589 /* U.A = S */
2590 lambda_matrix_right_hermite (A, dim, 1, S, U);
2591
2592 if (S[0][0] < 0)
2593 {
2594 S[0][0] *= -1;
2595 lambda_matrix_row_negate (U, dim, 0);
2596 }
2597 gcd_alpha_beta = S[0][0];
2598
2599 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2600 but that is a quite strange case. Instead of ICEing, answer
2601 don't know. */
2602 if (gcd_alpha_beta == 0)
2603 {
2604 *overlaps_a = conflict_fn_not_known ();
2605 *overlaps_b = conflict_fn_not_known ();
2606 *last_conflicts = chrec_dont_know;
2607 goto end_analyze_subs_aa;
2608 }
2609
2610 /* The classic "gcd-test". */
2611 if (!int_divides_p (gcd_alpha_beta, gamma))
2612 {
2613 /* The "gcd-test" has determined that there is no integer
2614 solution, i.e. there is no dependence. */
2615 *overlaps_a = conflict_fn_no_dependence ();
2616 *overlaps_b = conflict_fn_no_dependence ();
2617 *last_conflicts = integer_zero_node;
2618 }
2619
2620 /* Both access functions are univariate. This includes SIV and MIV cases. */
2621 else if (nb_vars_a == 1 && nb_vars_b == 1)
2622 {
2623 /* Both functions should have the same evolution sign. */
2624 if (((A[0][0] > 0 && -A[1][0] > 0)
2625 || (A[0][0] < 0 && -A[1][0] < 0)))
2626 {
2627 /* The solutions are given by:
2628 |
2629 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2630 | [u21 u22] [y0]
2631
2632 For a given integer t. Using the following variables,
2633
2634 | i0 = u11 * gamma / gcd_alpha_beta
2635 | j0 = u12 * gamma / gcd_alpha_beta
2636 | i1 = u21
2637 | j1 = u22
2638
2639 the solutions are:
2640
2641 | x0 = i0 + i1 * t,
2642 | y0 = j0 + j1 * t. */
2643 HOST_WIDE_INT i0, j0, i1, j1;
2644
2645 i0 = U[0][0] * gamma / gcd_alpha_beta;
2646 j0 = U[0][1] * gamma / gcd_alpha_beta;
2647 i1 = U[1][0];
2648 j1 = U[1][1];
2649
2650 if ((i1 == 0 && i0 < 0)
2651 || (j1 == 0 && j0 < 0))
2652 {
2653 /* There is no solution.
2654 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2655 falls in here, but for the moment we don't look at the
2656 upper bound of the iteration domain. */
2657 *overlaps_a = conflict_fn_no_dependence ();
2658 *overlaps_b = conflict_fn_no_dependence ();
2659 *last_conflicts = integer_zero_node;
2660 goto end_analyze_subs_aa;
2661 }
2662
2663 if (i1 > 0 && j1 > 0)
2664 {
2665 HOST_WIDE_INT niter_a
2666 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2667 HOST_WIDE_INT niter_b
2668 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2669 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2670
2671 /* (X0, Y0) is a solution of the Diophantine equation:
2672 "chrec_a (X0) = chrec_b (Y0)". */
2673 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2674 CEIL (-j0, j1));
2675 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2676 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2677
2678 /* (X1, Y1) is the smallest positive solution of the eq
2679 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2680 first conflict occurs. */
2681 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2682 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2683 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2684
2685 if (niter > 0)
2686 {
2687 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2688 FLOOR_DIV (niter - j0, j1));
2689 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2690
2691 /* If the overlap occurs outside of the bounds of the
2692 loop, there is no dependence. */
2693 if (x1 >= niter || y1 >= niter)
2694 {
2695 *overlaps_a = conflict_fn_no_dependence ();
2696 *overlaps_b = conflict_fn_no_dependence ();
2697 *last_conflicts = integer_zero_node;
2698 goto end_analyze_subs_aa;
2699 }
2700 else
2701 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2702 }
2703 else
2704 *last_conflicts = chrec_dont_know;
2705
2706 *overlaps_a
2707 = conflict_fn (1,
2708 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2709 1,
2710 build_int_cst (NULL_TREE, i1)));
2711 *overlaps_b
2712 = conflict_fn (1,
2713 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2714 1,
2715 build_int_cst (NULL_TREE, j1)));
2716 }
2717 else
2718 {
2719 /* FIXME: For the moment, the upper bound of the
2720 iteration domain for i and j is not checked. */
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2722 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2723 *overlaps_a = conflict_fn_not_known ();
2724 *overlaps_b = conflict_fn_not_known ();
2725 *last_conflicts = chrec_dont_know;
2726 }
2727 }
2728 else
2729 {
2730 if (dump_file && (dump_flags & TDF_DETAILS))
2731 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2732 *overlaps_a = conflict_fn_not_known ();
2733 *overlaps_b = conflict_fn_not_known ();
2734 *last_conflicts = chrec_dont_know;
2735 }
2736 }
2737 else
2738 {
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2740 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2741 *overlaps_a = conflict_fn_not_known ();
2742 *overlaps_b = conflict_fn_not_known ();
2743 *last_conflicts = chrec_dont_know;
2744 }
2745
2746 end_analyze_subs_aa:
2747 obstack_free (&scratch_obstack, NULL);
2748 if (dump_file && (dump_flags & TDF_DETAILS))
2749 {
2750 fprintf (dump_file, " (overlaps_a = ");
2751 dump_conflict_function (dump_file, *overlaps_a);
2752 fprintf (dump_file, ")\n (overlaps_b = ");
2753 dump_conflict_function (dump_file, *overlaps_b);
2754 fprintf (dump_file, "))\n");
2755 }
2756 }
2757
2758 /* Returns true when analyze_subscript_affine_affine can be used for
2759 determining the dependence relation between chrec_a and chrec_b,
2760 that contain symbols. This function modifies chrec_a and chrec_b
2761 such that the analysis result is the same, and such that they don't
2762 contain symbols, and then can safely be passed to the analyzer.
2763
2764 Example: The analysis of the following tuples of evolutions produce
2765 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2766 vs. {0, +, 1}_1
2767
2768 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2769 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2770 */
2771
2772 static bool
2773 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2774 {
2775 tree diff, type, left_a, left_b, right_b;
2776
2777 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2778 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2779 /* FIXME: For the moment not handled. Might be refined later. */
2780 return false;
2781
2782 type = chrec_type (*chrec_a);
2783 left_a = CHREC_LEFT (*chrec_a);
2784 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2785 diff = chrec_fold_minus (type, left_a, left_b);
2786
2787 if (!evolution_function_is_constant_p (diff))
2788 return false;
2789
2790 if (dump_file && (dump_flags & TDF_DETAILS))
2791 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2792
2793 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2794 diff, CHREC_RIGHT (*chrec_a));
2795 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2796 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2797 build_int_cst (type, 0),
2798 right_b);
2799 return true;
2800 }
2801
2802 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2803 *OVERLAPS_B are initialized to the functions that describe the
2804 relation between the elements accessed twice by CHREC_A and
2805 CHREC_B. For k >= 0, the following property is verified:
2806
2807 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2808
2809 static void
2810 analyze_siv_subscript (tree chrec_a,
2811 tree chrec_b,
2812 conflict_function **overlaps_a,
2813 conflict_function **overlaps_b,
2814 tree *last_conflicts,
2815 int loop_nest_num)
2816 {
2817 dependence_stats.num_siv++;
2818
2819 if (dump_file && (dump_flags & TDF_DETAILS))
2820 fprintf (dump_file, "(analyze_siv_subscript \n");
2821
2822 if (evolution_function_is_constant_p (chrec_a)
2823 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2824 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2825 overlaps_a, overlaps_b, last_conflicts);
2826
2827 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2828 && evolution_function_is_constant_p (chrec_b))
2829 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2830 overlaps_b, overlaps_a, last_conflicts);
2831
2832 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2833 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2834 {
2835 if (!chrec_contains_symbols (chrec_a)
2836 && !chrec_contains_symbols (chrec_b))
2837 {
2838 analyze_subscript_affine_affine (chrec_a, chrec_b,
2839 overlaps_a, overlaps_b,
2840 last_conflicts);
2841
2842 if (CF_NOT_KNOWN_P (*overlaps_a)
2843 || CF_NOT_KNOWN_P (*overlaps_b))
2844 dependence_stats.num_siv_unimplemented++;
2845 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2846 || CF_NO_DEPENDENCE_P (*overlaps_b))
2847 dependence_stats.num_siv_independent++;
2848 else
2849 dependence_stats.num_siv_dependent++;
2850 }
2851 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2852 &chrec_b))
2853 {
2854 analyze_subscript_affine_affine (chrec_a, chrec_b,
2855 overlaps_a, overlaps_b,
2856 last_conflicts);
2857
2858 if (CF_NOT_KNOWN_P (*overlaps_a)
2859 || CF_NOT_KNOWN_P (*overlaps_b))
2860 dependence_stats.num_siv_unimplemented++;
2861 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2862 || CF_NO_DEPENDENCE_P (*overlaps_b))
2863 dependence_stats.num_siv_independent++;
2864 else
2865 dependence_stats.num_siv_dependent++;
2866 }
2867 else
2868 goto siv_subscript_dontknow;
2869 }
2870
2871 else
2872 {
2873 siv_subscript_dontknow:;
2874 if (dump_file && (dump_flags & TDF_DETAILS))
2875 fprintf (dump_file, " siv test failed: unimplemented");
2876 *overlaps_a = conflict_fn_not_known ();
2877 *overlaps_b = conflict_fn_not_known ();
2878 *last_conflicts = chrec_dont_know;
2879 dependence_stats.num_siv_unimplemented++;
2880 }
2881
2882 if (dump_file && (dump_flags & TDF_DETAILS))
2883 fprintf (dump_file, ")\n");
2884 }
2885
2886 /* Returns false if we can prove that the greatest common divisor of the steps
2887 of CHREC does not divide CST, false otherwise. */
2888
2889 static bool
2890 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2891 {
2892 HOST_WIDE_INT cd = 0, val;
2893 tree step;
2894
2895 if (!tree_fits_shwi_p (cst))
2896 return true;
2897 val = tree_to_shwi (cst);
2898
2899 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2900 {
2901 step = CHREC_RIGHT (chrec);
2902 if (!tree_fits_shwi_p (step))
2903 return true;
2904 cd = gcd (cd, tree_to_shwi (step));
2905 chrec = CHREC_LEFT (chrec);
2906 }
2907
2908 return val % cd == 0;
2909 }
2910
2911 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2912 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2913 functions that describe the relation between the elements accessed
2914 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2915 is verified:
2916
2917 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2918
2919 static void
2920 analyze_miv_subscript (tree chrec_a,
2921 tree chrec_b,
2922 conflict_function **overlaps_a,
2923 conflict_function **overlaps_b,
2924 tree *last_conflicts,
2925 struct loop *loop_nest)
2926 {
2927 tree type, difference;
2928
2929 dependence_stats.num_miv++;
2930 if (dump_file && (dump_flags & TDF_DETAILS))
2931 fprintf (dump_file, "(analyze_miv_subscript \n");
2932
2933 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2934 chrec_a = chrec_convert (type, chrec_a, NULL);
2935 chrec_b = chrec_convert (type, chrec_b, NULL);
2936 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2937
2938 if (eq_evolutions_p (chrec_a, chrec_b))
2939 {
2940 /* Access functions are the same: all the elements are accessed
2941 in the same order. */
2942 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2943 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2944 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2945 dependence_stats.num_miv_dependent++;
2946 }
2947
2948 else if (evolution_function_is_constant_p (difference)
2949 /* For the moment, the following is verified:
2950 evolution_function_is_affine_multivariate_p (chrec_a,
2951 loop_nest->num) */
2952 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2953 {
2954 /* testsuite/.../ssa-chrec-33.c
2955 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2956
2957 The difference is 1, and all the evolution steps are multiples
2958 of 2, consequently there are no overlapping elements. */
2959 *overlaps_a = conflict_fn_no_dependence ();
2960 *overlaps_b = conflict_fn_no_dependence ();
2961 *last_conflicts = integer_zero_node;
2962 dependence_stats.num_miv_independent++;
2963 }
2964
2965 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2966 && !chrec_contains_symbols (chrec_a)
2967 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2968 && !chrec_contains_symbols (chrec_b))
2969 {
2970 /* testsuite/.../ssa-chrec-35.c
2971 {0, +, 1}_2 vs. {0, +, 1}_3
2972 the overlapping elements are respectively located at iterations:
2973 {0, +, 1}_x and {0, +, 1}_x,
2974 in other words, we have the equality:
2975 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2976
2977 Other examples:
2978 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2979 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2980
2981 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2982 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2983 */
2984 analyze_subscript_affine_affine (chrec_a, chrec_b,
2985 overlaps_a, overlaps_b, last_conflicts);
2986
2987 if (CF_NOT_KNOWN_P (*overlaps_a)
2988 || CF_NOT_KNOWN_P (*overlaps_b))
2989 dependence_stats.num_miv_unimplemented++;
2990 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2991 || CF_NO_DEPENDENCE_P (*overlaps_b))
2992 dependence_stats.num_miv_independent++;
2993 else
2994 dependence_stats.num_miv_dependent++;
2995 }
2996
2997 else
2998 {
2999 /* When the analysis is too difficult, answer "don't know". */
3000 if (dump_file && (dump_flags & TDF_DETAILS))
3001 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3002
3003 *overlaps_a = conflict_fn_not_known ();
3004 *overlaps_b = conflict_fn_not_known ();
3005 *last_conflicts = chrec_dont_know;
3006 dependence_stats.num_miv_unimplemented++;
3007 }
3008
3009 if (dump_file && (dump_flags & TDF_DETAILS))
3010 fprintf (dump_file, ")\n");
3011 }
3012
3013 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3014 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3015 OVERLAP_ITERATIONS_B are initialized with two functions that
3016 describe the iterations that contain conflicting elements.
3017
3018 Remark: For an integer k >= 0, the following equality is true:
3019
3020 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3021 */
3022
3023 static void
3024 analyze_overlapping_iterations (tree chrec_a,
3025 tree chrec_b,
3026 conflict_function **overlap_iterations_a,
3027 conflict_function **overlap_iterations_b,
3028 tree *last_conflicts, struct loop *loop_nest)
3029 {
3030 unsigned int lnn = loop_nest->num;
3031
3032 dependence_stats.num_subscript_tests++;
3033
3034 if (dump_file && (dump_flags & TDF_DETAILS))
3035 {
3036 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3037 fprintf (dump_file, " (chrec_a = ");
3038 print_generic_expr (dump_file, chrec_a, 0);
3039 fprintf (dump_file, ")\n (chrec_b = ");
3040 print_generic_expr (dump_file, chrec_b, 0);
3041 fprintf (dump_file, ")\n");
3042 }
3043
3044 if (chrec_a == NULL_TREE
3045 || chrec_b == NULL_TREE
3046 || chrec_contains_undetermined (chrec_a)
3047 || chrec_contains_undetermined (chrec_b))
3048 {
3049 dependence_stats.num_subscript_undetermined++;
3050
3051 *overlap_iterations_a = conflict_fn_not_known ();
3052 *overlap_iterations_b = conflict_fn_not_known ();
3053 }
3054
3055 /* If they are the same chrec, and are affine, they overlap
3056 on every iteration. */
3057 else if (eq_evolutions_p (chrec_a, chrec_b)
3058 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3059 || operand_equal_p (chrec_a, chrec_b, 0)))
3060 {
3061 dependence_stats.num_same_subscript_function++;
3062 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3063 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3064 *last_conflicts = chrec_dont_know;
3065 }
3066
3067 /* If they aren't the same, and aren't affine, we can't do anything
3068 yet. */
3069 else if ((chrec_contains_symbols (chrec_a)
3070 || chrec_contains_symbols (chrec_b))
3071 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3072 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3073 {
3074 dependence_stats.num_subscript_undetermined++;
3075 *overlap_iterations_a = conflict_fn_not_known ();
3076 *overlap_iterations_b = conflict_fn_not_known ();
3077 }
3078
3079 else if (ziv_subscript_p (chrec_a, chrec_b))
3080 analyze_ziv_subscript (chrec_a, chrec_b,
3081 overlap_iterations_a, overlap_iterations_b,
3082 last_conflicts);
3083
3084 else if (siv_subscript_p (chrec_a, chrec_b))
3085 analyze_siv_subscript (chrec_a, chrec_b,
3086 overlap_iterations_a, overlap_iterations_b,
3087 last_conflicts, lnn);
3088
3089 else
3090 analyze_miv_subscript (chrec_a, chrec_b,
3091 overlap_iterations_a, overlap_iterations_b,
3092 last_conflicts, loop_nest);
3093
3094 if (dump_file && (dump_flags & TDF_DETAILS))
3095 {
3096 fprintf (dump_file, " (overlap_iterations_a = ");
3097 dump_conflict_function (dump_file, *overlap_iterations_a);
3098 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3099 dump_conflict_function (dump_file, *overlap_iterations_b);
3100 fprintf (dump_file, "))\n");
3101 }
3102 }
3103
3104 /* Helper function for uniquely inserting distance vectors. */
3105
3106 static void
3107 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3108 {
3109 unsigned i;
3110 lambda_vector v;
3111
3112 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3113 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3114 return;
3115
3116 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3117 }
3118
3119 /* Helper function for uniquely inserting direction vectors. */
3120
3121 static void
3122 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3123 {
3124 unsigned i;
3125 lambda_vector v;
3126
3127 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3128 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3129 return;
3130
3131 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3132 }
3133
3134 /* Add a distance of 1 on all the loops outer than INDEX. If we
3135 haven't yet determined a distance for this outer loop, push a new
3136 distance vector composed of the previous distance, and a distance
3137 of 1 for this outer loop. Example:
3138
3139 | loop_1
3140 | loop_2
3141 | A[10]
3142 | endloop_2
3143 | endloop_1
3144
3145 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3146 save (0, 1), then we have to save (1, 0). */
3147
3148 static void
3149 add_outer_distances (struct data_dependence_relation *ddr,
3150 lambda_vector dist_v, int index)
3151 {
3152 /* For each outer loop where init_v is not set, the accesses are
3153 in dependence of distance 1 in the loop. */
3154 while (--index >= 0)
3155 {
3156 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3157 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3158 save_v[index] = 1;
3159 save_dist_v (ddr, save_v);
3160 }
3161 }
3162
3163 /* Return false when fail to represent the data dependence as a
3164 distance vector. INIT_B is set to true when a component has been
3165 added to the distance vector DIST_V. INDEX_CARRY is then set to
3166 the index in DIST_V that carries the dependence. */
3167
3168 static bool
3169 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3170 struct data_reference *ddr_a,
3171 struct data_reference *ddr_b,
3172 lambda_vector dist_v, bool *init_b,
3173 int *index_carry)
3174 {
3175 unsigned i;
3176 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3177
3178 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3179 {
3180 tree access_fn_a, access_fn_b;
3181 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3182
3183 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3184 {
3185 non_affine_dependence_relation (ddr);
3186 return false;
3187 }
3188
3189 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3190 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3191
3192 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3193 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3194 {
3195 int dist, index;
3196 int var_a = CHREC_VARIABLE (access_fn_a);
3197 int var_b = CHREC_VARIABLE (access_fn_b);
3198
3199 if (var_a != var_b
3200 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3201 {
3202 non_affine_dependence_relation (ddr);
3203 return false;
3204 }
3205
3206 dist = int_cst_value (SUB_DISTANCE (subscript));
3207 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3208 *index_carry = MIN (index, *index_carry);
3209
3210 /* This is the subscript coupling test. If we have already
3211 recorded a distance for this loop (a distance coming from
3212 another subscript), it should be the same. For example,
3213 in the following code, there is no dependence:
3214
3215 | loop i = 0, N, 1
3216 | T[i+1][i] = ...
3217 | ... = T[i][i]
3218 | endloop
3219 */
3220 if (init_v[index] != 0 && dist_v[index] != dist)
3221 {
3222 finalize_ddr_dependent (ddr, chrec_known);
3223 return false;
3224 }
3225
3226 dist_v[index] = dist;
3227 init_v[index] = 1;
3228 *init_b = true;
3229 }
3230 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3231 {
3232 /* This can be for example an affine vs. constant dependence
3233 (T[i] vs. T[3]) that is not an affine dependence and is
3234 not representable as a distance vector. */
3235 non_affine_dependence_relation (ddr);
3236 return false;
3237 }
3238 }
3239
3240 return true;
3241 }
3242
3243 /* Return true when the DDR contains only constant access functions. */
3244
3245 static bool
3246 constant_access_functions (const struct data_dependence_relation *ddr)
3247 {
3248 unsigned i;
3249
3250 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3251 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3252 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3253 return false;
3254
3255 return true;
3256 }
3257
3258 /* Helper function for the case where DDR_A and DDR_B are the same
3259 multivariate access function with a constant step. For an example
3260 see pr34635-1.c. */
3261
3262 static void
3263 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3264 {
3265 int x_1, x_2;
3266 tree c_1 = CHREC_LEFT (c_2);
3267 tree c_0 = CHREC_LEFT (c_1);
3268 lambda_vector dist_v;
3269 int v1, v2, cd;
3270
3271 /* Polynomials with more than 2 variables are not handled yet. When
3272 the evolution steps are parameters, it is not possible to
3273 represent the dependence using classical distance vectors. */
3274 if (TREE_CODE (c_0) != INTEGER_CST
3275 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3276 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3277 {
3278 DDR_AFFINE_P (ddr) = false;
3279 return;
3280 }
3281
3282 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3283 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3284
3285 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3286 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3287 v1 = int_cst_value (CHREC_RIGHT (c_1));
3288 v2 = int_cst_value (CHREC_RIGHT (c_2));
3289 cd = gcd (v1, v2);
3290 v1 /= cd;
3291 v2 /= cd;
3292
3293 if (v2 < 0)
3294 {
3295 v2 = -v2;
3296 v1 = -v1;
3297 }
3298
3299 dist_v[x_1] = v2;
3300 dist_v[x_2] = -v1;
3301 save_dist_v (ddr, dist_v);
3302
3303 add_outer_distances (ddr, dist_v, x_1);
3304 }
3305
3306 /* Helper function for the case where DDR_A and DDR_B are the same
3307 access functions. */
3308
3309 static void
3310 add_other_self_distances (struct data_dependence_relation *ddr)
3311 {
3312 lambda_vector dist_v;
3313 unsigned i;
3314 int index_carry = DDR_NB_LOOPS (ddr);
3315
3316 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3317 {
3318 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3319
3320 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3321 {
3322 if (!evolution_function_is_univariate_p (access_fun))
3323 {
3324 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3325 {
3326 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3327 return;
3328 }
3329
3330 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3331
3332 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3333 add_multivariate_self_dist (ddr, access_fun);
3334 else
3335 /* The evolution step is not constant: it varies in
3336 the outer loop, so this cannot be represented by a
3337 distance vector. For example in pr34635.c the
3338 evolution is {0, +, {0, +, 4}_1}_2. */
3339 DDR_AFFINE_P (ddr) = false;
3340
3341 return;
3342 }
3343
3344 index_carry = MIN (index_carry,
3345 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3346 DDR_LOOP_NEST (ddr)));
3347 }
3348 }
3349
3350 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3351 add_outer_distances (ddr, dist_v, index_carry);
3352 }
3353
3354 static void
3355 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3356 {
3357 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3358
3359 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3360 save_dist_v (ddr, dist_v);
3361 }
3362
3363 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3364 is the case for example when access functions are the same and
3365 equal to a constant, as in:
3366
3367 | loop_1
3368 | A[3] = ...
3369 | ... = A[3]
3370 | endloop_1
3371
3372 in which case the distance vectors are (0) and (1). */
3373
3374 static void
3375 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3376 {
3377 unsigned i, j;
3378
3379 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3380 {
3381 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3382 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3383 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3384
3385 for (j = 0; j < ca->n; j++)
3386 if (affine_function_zero_p (ca->fns[j]))
3387 {
3388 insert_innermost_unit_dist_vector (ddr);
3389 return;
3390 }
3391
3392 for (j = 0; j < cb->n; j++)
3393 if (affine_function_zero_p (cb->fns[j]))
3394 {
3395 insert_innermost_unit_dist_vector (ddr);
3396 return;
3397 }
3398 }
3399 }
3400
3401 /* Compute the classic per loop distance vector. DDR is the data
3402 dependence relation to build a vector from. Return false when fail
3403 to represent the data dependence as a distance vector. */
3404
3405 static bool
3406 build_classic_dist_vector (struct data_dependence_relation *ddr,
3407 struct loop *loop_nest)
3408 {
3409 bool init_b = false;
3410 int index_carry = DDR_NB_LOOPS (ddr);
3411 lambda_vector dist_v;
3412
3413 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3414 return false;
3415
3416 if (same_access_functions (ddr))
3417 {
3418 /* Save the 0 vector. */
3419 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3420 save_dist_v (ddr, dist_v);
3421
3422 if (constant_access_functions (ddr))
3423 add_distance_for_zero_overlaps (ddr);
3424
3425 if (DDR_NB_LOOPS (ddr) > 1)
3426 add_other_self_distances (ddr);
3427
3428 return true;
3429 }
3430
3431 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3432 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3433 dist_v, &init_b, &index_carry))
3434 return false;
3435
3436 /* Save the distance vector if we initialized one. */
3437 if (init_b)
3438 {
3439 /* Verify a basic constraint: classic distance vectors should
3440 always be lexicographically positive.
3441
3442 Data references are collected in the order of execution of
3443 the program, thus for the following loop
3444
3445 | for (i = 1; i < 100; i++)
3446 | for (j = 1; j < 100; j++)
3447 | {
3448 | t = T[j+1][i-1]; // A
3449 | T[j][i] = t + 2; // B
3450 | }
3451
3452 references are collected following the direction of the wind:
3453 A then B. The data dependence tests are performed also
3454 following this order, such that we're looking at the distance
3455 separating the elements accessed by A from the elements later
3456 accessed by B. But in this example, the distance returned by
3457 test_dep (A, B) is lexicographically negative (-1, 1), that
3458 means that the access A occurs later than B with respect to
3459 the outer loop, ie. we're actually looking upwind. In this
3460 case we solve test_dep (B, A) looking downwind to the
3461 lexicographically positive solution, that returns the
3462 distance vector (1, -1). */
3463 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3464 {
3465 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3466 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3467 loop_nest))
3468 return false;
3469 compute_subscript_distance (ddr);
3470 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3471 save_v, &init_b, &index_carry))
3472 return false;
3473 save_dist_v (ddr, save_v);
3474 DDR_REVERSED_P (ddr) = true;
3475
3476 /* In this case there is a dependence forward for all the
3477 outer loops:
3478
3479 | for (k = 1; k < 100; k++)
3480 | for (i = 1; i < 100; i++)
3481 | for (j = 1; j < 100; j++)
3482 | {
3483 | t = T[j+1][i-1]; // A
3484 | T[j][i] = t + 2; // B
3485 | }
3486
3487 the vectors are:
3488 (0, 1, -1)
3489 (1, 1, -1)
3490 (1, -1, 1)
3491 */
3492 if (DDR_NB_LOOPS (ddr) > 1)
3493 {
3494 add_outer_distances (ddr, save_v, index_carry);
3495 add_outer_distances (ddr, dist_v, index_carry);
3496 }
3497 }
3498 else
3499 {
3500 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3501 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3502
3503 if (DDR_NB_LOOPS (ddr) > 1)
3504 {
3505 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3506
3507 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3508 DDR_A (ddr), loop_nest))
3509 return false;
3510 compute_subscript_distance (ddr);
3511 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3512 opposite_v, &init_b,
3513 &index_carry))
3514 return false;
3515
3516 save_dist_v (ddr, save_v);
3517 add_outer_distances (ddr, dist_v, index_carry);
3518 add_outer_distances (ddr, opposite_v, index_carry);
3519 }
3520 else
3521 save_dist_v (ddr, save_v);
3522 }
3523 }
3524 else
3525 {
3526 /* There is a distance of 1 on all the outer loops: Example:
3527 there is a dependence of distance 1 on loop_1 for the array A.
3528
3529 | loop_1
3530 | A[5] = ...
3531 | endloop
3532 */
3533 add_outer_distances (ddr, dist_v,
3534 lambda_vector_first_nz (dist_v,
3535 DDR_NB_LOOPS (ddr), 0));
3536 }
3537
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 {
3540 unsigned i;
3541
3542 fprintf (dump_file, "(build_classic_dist_vector\n");
3543 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3544 {
3545 fprintf (dump_file, " dist_vector = (");
3546 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3547 DDR_NB_LOOPS (ddr));
3548 fprintf (dump_file, " )\n");
3549 }
3550 fprintf (dump_file, ")\n");
3551 }
3552
3553 return true;
3554 }
3555
3556 /* Return the direction for a given distance.
3557 FIXME: Computing dir this way is suboptimal, since dir can catch
3558 cases that dist is unable to represent. */
3559
3560 static inline enum data_dependence_direction
3561 dir_from_dist (int dist)
3562 {
3563 if (dist > 0)
3564 return dir_positive;
3565 else if (dist < 0)
3566 return dir_negative;
3567 else
3568 return dir_equal;
3569 }
3570
3571 /* Compute the classic per loop direction vector. DDR is the data
3572 dependence relation to build a vector from. */
3573
3574 static void
3575 build_classic_dir_vector (struct data_dependence_relation *ddr)
3576 {
3577 unsigned i, j;
3578 lambda_vector dist_v;
3579
3580 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3581 {
3582 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3583
3584 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3585 dir_v[j] = dir_from_dist (dist_v[j]);
3586
3587 save_dir_v (ddr, dir_v);
3588 }
3589 }
3590
3591 /* Helper function. Returns true when there is a dependence between
3592 data references DRA and DRB. */
3593
3594 static bool
3595 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3596 struct data_reference *dra,
3597 struct data_reference *drb,
3598 struct loop *loop_nest)
3599 {
3600 unsigned int i;
3601 tree last_conflicts;
3602 struct subscript *subscript;
3603 tree res = NULL_TREE;
3604
3605 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3606 {
3607 conflict_function *overlaps_a, *overlaps_b;
3608
3609 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3610 DR_ACCESS_FN (drb, i),
3611 &overlaps_a, &overlaps_b,
3612 &last_conflicts, loop_nest);
3613
3614 if (SUB_CONFLICTS_IN_A (subscript))
3615 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3616 if (SUB_CONFLICTS_IN_B (subscript))
3617 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3618
3619 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3620 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3621 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3622
3623 /* If there is any undetermined conflict function we have to
3624 give a conservative answer in case we cannot prove that
3625 no dependence exists when analyzing another subscript. */
3626 if (CF_NOT_KNOWN_P (overlaps_a)
3627 || CF_NOT_KNOWN_P (overlaps_b))
3628 {
3629 res = chrec_dont_know;
3630 continue;
3631 }
3632
3633 /* When there is a subscript with no dependence we can stop. */
3634 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3635 || CF_NO_DEPENDENCE_P (overlaps_b))
3636 {
3637 res = chrec_known;
3638 break;
3639 }
3640 }
3641
3642 if (res == NULL_TREE)
3643 return true;
3644
3645 if (res == chrec_known)
3646 dependence_stats.num_dependence_independent++;
3647 else
3648 dependence_stats.num_dependence_undetermined++;
3649 finalize_ddr_dependent (ddr, res);
3650 return false;
3651 }
3652
3653 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3654
3655 static void
3656 subscript_dependence_tester (struct data_dependence_relation *ddr,
3657 struct loop *loop_nest)
3658 {
3659 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3660 dependence_stats.num_dependence_dependent++;
3661
3662 compute_subscript_distance (ddr);
3663 if (build_classic_dist_vector (ddr, loop_nest))
3664 build_classic_dir_vector (ddr);
3665 }
3666
3667 /* Returns true when all the access functions of A are affine or
3668 constant with respect to LOOP_NEST. */
3669
3670 static bool
3671 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3672 const struct loop *loop_nest)
3673 {
3674 unsigned int i;
3675 vec<tree> fns = DR_ACCESS_FNS (a);
3676 tree t;
3677
3678 FOR_EACH_VEC_ELT (fns, i, t)
3679 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3680 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3681 return false;
3682
3683 return true;
3684 }
3685
3686 /* Initializes an equation for an OMEGA problem using the information
3687 contained in the ACCESS_FUN. Returns true when the operation
3688 succeeded.
3689
3690 PB is the omega constraint system.
3691 EQ is the number of the equation to be initialized.
3692 OFFSET is used for shifting the variables names in the constraints:
3693 a constrain is composed of 2 * the number of variables surrounding
3694 dependence accesses. OFFSET is set either to 0 for the first n variables,
3695 then it is set to n.
3696 ACCESS_FUN is expected to be an affine chrec. */
3697
3698 static bool
3699 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3700 unsigned int offset, tree access_fun,
3701 struct data_dependence_relation *ddr)
3702 {
3703 switch (TREE_CODE (access_fun))
3704 {
3705 case POLYNOMIAL_CHREC:
3706 {
3707 tree left = CHREC_LEFT (access_fun);
3708 tree right = CHREC_RIGHT (access_fun);
3709 int var = CHREC_VARIABLE (access_fun);
3710 unsigned var_idx;
3711
3712 if (TREE_CODE (right) != INTEGER_CST)
3713 return false;
3714
3715 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3716 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3717
3718 /* Compute the innermost loop index. */
3719 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3720
3721 if (offset == 0)
3722 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3723 += int_cst_value (right);
3724
3725 switch (TREE_CODE (left))
3726 {
3727 case POLYNOMIAL_CHREC:
3728 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3729
3730 case INTEGER_CST:
3731 pb->eqs[eq].coef[0] += int_cst_value (left);
3732 return true;
3733
3734 default:
3735 return false;
3736 }
3737 }
3738
3739 case INTEGER_CST:
3740 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3741 return true;
3742
3743 default:
3744 return false;
3745 }
3746 }
3747
3748 /* As explained in the comments preceding init_omega_for_ddr, we have
3749 to set up a system for each loop level, setting outer loops
3750 variation to zero, and current loop variation to positive or zero.
3751 Save each lexico positive distance vector. */
3752
3753 static void
3754 omega_extract_distance_vectors (omega_pb pb,
3755 struct data_dependence_relation *ddr)
3756 {
3757 int eq, geq;
3758 unsigned i, j;
3759 struct loop *loopi, *loopj;
3760 enum omega_result res;
3761
3762 /* Set a new problem for each loop in the nest. The basis is the
3763 problem that we have initialized until now. On top of this we
3764 add new constraints. */
3765 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3766 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3767 {
3768 int dist = 0;
3769 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3770 DDR_NB_LOOPS (ddr));
3771
3772 omega_copy_problem (copy, pb);
3773
3774 /* For all the outer loops "loop_j", add "dj = 0". */
3775 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3776 {
3777 eq = omega_add_zero_eq (copy, omega_black);
3778 copy->eqs[eq].coef[j + 1] = 1;
3779 }
3780
3781 /* For "loop_i", add "0 <= di". */
3782 geq = omega_add_zero_geq (copy, omega_black);
3783 copy->geqs[geq].coef[i + 1] = 1;
3784
3785 /* Reduce the constraint system, and test that the current
3786 problem is feasible. */
3787 res = omega_simplify_problem (copy);
3788 if (res == omega_false
3789 || res == omega_unknown
3790 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3791 goto next_problem;
3792
3793 for (eq = 0; eq < copy->num_subs; eq++)
3794 if (copy->subs[eq].key == (int) i + 1)
3795 {
3796 dist = copy->subs[eq].coef[0];
3797 goto found_dist;
3798 }
3799
3800 if (dist == 0)
3801 {
3802 /* Reinitialize problem... */
3803 omega_copy_problem (copy, pb);
3804 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3805 {
3806 eq = omega_add_zero_eq (copy, omega_black);
3807 copy->eqs[eq].coef[j + 1] = 1;
3808 }
3809
3810 /* ..., but this time "di = 1". */
3811 eq = omega_add_zero_eq (copy, omega_black);
3812 copy->eqs[eq].coef[i + 1] = 1;
3813 copy->eqs[eq].coef[0] = -1;
3814
3815 res = omega_simplify_problem (copy);
3816 if (res == omega_false
3817 || res == omega_unknown
3818 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3819 goto next_problem;
3820
3821 for (eq = 0; eq < copy->num_subs; eq++)
3822 if (copy->subs[eq].key == (int) i + 1)
3823 {
3824 dist = copy->subs[eq].coef[0];
3825 goto found_dist;
3826 }
3827 }
3828
3829 found_dist:;
3830 /* Save the lexicographically positive distance vector. */
3831 if (dist >= 0)
3832 {
3833 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3834 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3835
3836 dist_v[i] = dist;
3837
3838 for (eq = 0; eq < copy->num_subs; eq++)
3839 if (copy->subs[eq].key > 0)
3840 {
3841 dist = copy->subs[eq].coef[0];
3842 dist_v[copy->subs[eq].key - 1] = dist;
3843 }
3844
3845 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3846 dir_v[j] = dir_from_dist (dist_v[j]);
3847
3848 save_dist_v (ddr, dist_v);
3849 save_dir_v (ddr, dir_v);
3850 }
3851
3852 next_problem:;
3853 omega_free_problem (copy);
3854 }
3855 }
3856
3857 /* This is called for each subscript of a tuple of data references:
3858 insert an equality for representing the conflicts. */
3859
3860 static bool
3861 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3862 struct data_dependence_relation *ddr,
3863 omega_pb pb, bool *maybe_dependent)
3864 {
3865 int eq;
3866 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3867 TREE_TYPE (access_fun_b));
3868 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3869 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3870 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3871 tree minus_one;
3872
3873 /* When the fun_a - fun_b is not constant, the dependence is not
3874 captured by the classic distance vector representation. */
3875 if (TREE_CODE (difference) != INTEGER_CST)
3876 return false;
3877
3878 /* ZIV test. */
3879 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3880 {
3881 /* There is no dependence. */
3882 *maybe_dependent = false;
3883 return true;
3884 }
3885
3886 minus_one = build_int_cst (type, -1);
3887 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3888
3889 eq = omega_add_zero_eq (pb, omega_black);
3890 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3891 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3892 /* There is probably a dependence, but the system of
3893 constraints cannot be built: answer "don't know". */
3894 return false;
3895
3896 /* GCD test. */
3897 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3898 && !int_divides_p (lambda_vector_gcd
3899 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3900 2 * DDR_NB_LOOPS (ddr)),
3901 pb->eqs[eq].coef[0]))
3902 {
3903 /* There is no dependence. */
3904 *maybe_dependent = false;
3905 return true;
3906 }
3907
3908 return true;
3909 }
3910
3911 /* Helper function, same as init_omega_for_ddr but specialized for
3912 data references A and B. */
3913
3914 static bool
3915 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3916 struct data_dependence_relation *ddr,
3917 omega_pb pb, bool *maybe_dependent)
3918 {
3919 unsigned i;
3920 int ineq;
3921 struct loop *loopi;
3922 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3923
3924 /* Insert an equality per subscript. */
3925 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3926 {
3927 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3928 ddr, pb, maybe_dependent))
3929 return false;
3930 else if (*maybe_dependent == false)
3931 {
3932 /* There is no dependence. */
3933 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3934 return true;
3935 }
3936 }
3937
3938 /* Insert inequalities: constraints corresponding to the iteration
3939 domain, i.e. the loops surrounding the references "loop_x" and
3940 the distance variables "dx". The layout of the OMEGA
3941 representation is as follows:
3942 - coef[0] is the constant
3943 - coef[1..nb_loops] are the protected variables that will not be
3944 removed by the solver: the "dx"
3945 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3946 */
3947 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3948 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3949 {
3950 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3951
3952 /* 0 <= loop_x */
3953 ineq = omega_add_zero_geq (pb, omega_black);
3954 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3955
3956 /* 0 <= loop_x + dx */
3957 ineq = omega_add_zero_geq (pb, omega_black);
3958 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3959 pb->geqs[ineq].coef[i + 1] = 1;
3960
3961 if (nbi != -1)
3962 {
3963 /* loop_x <= nb_iters */
3964 ineq = omega_add_zero_geq (pb, omega_black);
3965 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3966 pb->geqs[ineq].coef[0] = nbi;
3967
3968 /* loop_x + dx <= nb_iters */
3969 ineq = omega_add_zero_geq (pb, omega_black);
3970 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3971 pb->geqs[ineq].coef[i + 1] = -1;
3972 pb->geqs[ineq].coef[0] = nbi;
3973
3974 /* A step "dx" bigger than nb_iters is not feasible, so
3975 add "0 <= nb_iters + dx", */
3976 ineq = omega_add_zero_geq (pb, omega_black);
3977 pb->geqs[ineq].coef[i + 1] = 1;
3978 pb->geqs[ineq].coef[0] = nbi;
3979 /* and "dx <= nb_iters". */
3980 ineq = omega_add_zero_geq (pb, omega_black);
3981 pb->geqs[ineq].coef[i + 1] = -1;
3982 pb->geqs[ineq].coef[0] = nbi;
3983 }
3984 }
3985
3986 omega_extract_distance_vectors (pb, ddr);
3987
3988 return true;
3989 }
3990
3991 /* Sets up the Omega dependence problem for the data dependence
3992 relation DDR. Returns false when the constraint system cannot be
3993 built, ie. when the test answers "don't know". Returns true
3994 otherwise, and when independence has been proved (using one of the
3995 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3996 set MAYBE_DEPENDENT to true.
3997
3998 Example: for setting up the dependence system corresponding to the
3999 conflicting accesses
4000
4001 | loop_i
4002 | loop_j
4003 | A[i, i+1] = ...
4004 | ... A[2*j, 2*(i + j)]
4005 | endloop_j
4006 | endloop_i
4007
4008 the following constraints come from the iteration domain:
4009
4010 0 <= i <= Ni
4011 0 <= i + di <= Ni
4012 0 <= j <= Nj
4013 0 <= j + dj <= Nj
4014
4015 where di, dj are the distance variables. The constraints
4016 representing the conflicting elements are:
4017
4018 i = 2 * (j + dj)
4019 i + 1 = 2 * (i + di + j + dj)
4020
4021 For asking that the resulting distance vector (di, dj) be
4022 lexicographically positive, we insert the constraint "di >= 0". If
4023 "di = 0" in the solution, we fix that component to zero, and we
4024 look at the inner loops: we set a new problem where all the outer
4025 loop distances are zero, and fix this inner component to be
4026 positive. When one of the components is positive, we save that
4027 distance, and set a new problem where the distance on this loop is
4028 zero, searching for other distances in the inner loops. Here is
4029 the classic example that illustrates that we have to set for each
4030 inner loop a new problem:
4031
4032 | loop_1
4033 | loop_2
4034 | A[10]
4035 | endloop_2
4036 | endloop_1
4037
4038 we have to save two distances (1, 0) and (0, 1).
4039
4040 Given two array references, refA and refB, we have to set the
4041 dependence problem twice, refA vs. refB and refB vs. refA, and we
4042 cannot do a single test, as refB might occur before refA in the
4043 inner loops, and the contrary when considering outer loops: ex.
4044
4045 | loop_0
4046 | loop_1
4047 | loop_2
4048 | T[{1,+,1}_2][{1,+,1}_1] // refA
4049 | T[{2,+,1}_2][{0,+,1}_1] // refB
4050 | endloop_2
4051 | endloop_1
4052 | endloop_0
4053
4054 refB touches the elements in T before refA, and thus for the same
4055 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4056 but for successive loop_0 iterations, we have (1, -1, 1)
4057
4058 The Omega solver expects the distance variables ("di" in the
4059 previous example) to come first in the constraint system (as
4060 variables to be protected, or "safe" variables), the constraint
4061 system is built using the following layout:
4062
4063 "cst | distance vars | index vars".
4064 */
4065
4066 static bool
4067 init_omega_for_ddr (struct data_dependence_relation *ddr,
4068 bool *maybe_dependent)
4069 {
4070 omega_pb pb;
4071 bool res = false;
4072
4073 *maybe_dependent = true;
4074
4075 if (same_access_functions (ddr))
4076 {
4077 unsigned j;
4078 lambda_vector dir_v;
4079
4080 /* Save the 0 vector. */
4081 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4082 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4083 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4084 dir_v[j] = dir_equal;
4085 save_dir_v (ddr, dir_v);
4086
4087 /* Save the dependences carried by outer loops. */
4088 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4089 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4090 maybe_dependent);
4091 omega_free_problem (pb);
4092 return res;
4093 }
4094
4095 /* Omega expects the protected variables (those that have to be kept
4096 after elimination) to appear first in the constraint system.
4097 These variables are the distance variables. In the following
4098 initialization we declare NB_LOOPS safe variables, and the total
4099 number of variables for the constraint system is 2*NB_LOOPS. */
4100 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4101 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4102 maybe_dependent);
4103 omega_free_problem (pb);
4104
4105 /* Stop computation if not decidable, or no dependence. */
4106 if (res == false || *maybe_dependent == false)
4107 return res;
4108
4109 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4110 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4111 maybe_dependent);
4112 omega_free_problem (pb);
4113
4114 return res;
4115 }
4116
4117 /* Return true when DDR contains the same information as that stored
4118 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4119
4120 static bool
4121 ddr_consistent_p (FILE *file,
4122 struct data_dependence_relation *ddr,
4123 vec<lambda_vector> dist_vects,
4124 vec<lambda_vector> dir_vects)
4125 {
4126 unsigned int i, j;
4127
4128 /* If dump_file is set, output there. */
4129 if (dump_file && (dump_flags & TDF_DETAILS))
4130 file = dump_file;
4131
4132 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4133 {
4134 lambda_vector b_dist_v;
4135 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4136 dist_vects.length (),
4137 DDR_NUM_DIST_VECTS (ddr));
4138
4139 fprintf (file, "Banerjee dist vectors:\n");
4140 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4141 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4142
4143 fprintf (file, "Omega dist vectors:\n");
4144 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4145 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4146
4147 fprintf (file, "data dependence relation:\n");
4148 dump_data_dependence_relation (file, ddr);
4149
4150 fprintf (file, ")\n");
4151 return false;
4152 }
4153
4154 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4155 {
4156 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4157 dir_vects.length (),
4158 DDR_NUM_DIR_VECTS (ddr));
4159 return false;
4160 }
4161
4162 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4163 {
4164 lambda_vector a_dist_v;
4165 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4166
4167 /* Distance vectors are not ordered in the same way in the DDR
4168 and in the DIST_VECTS: search for a matching vector. */
4169 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4170 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4171 break;
4172
4173 if (j == dist_vects.length ())
4174 {
4175 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4176 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4177 fprintf (file, "not found in Omega dist vectors:\n");
4178 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4179 fprintf (file, "data dependence relation:\n");
4180 dump_data_dependence_relation (file, ddr);
4181 fprintf (file, ")\n");
4182 }
4183 }
4184
4185 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4186 {
4187 lambda_vector a_dir_v;
4188 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4189
4190 /* Direction vectors are not ordered in the same way in the DDR
4191 and in the DIR_VECTS: search for a matching vector. */
4192 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4193 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4194 break;
4195
4196 if (j == dist_vects.length ())
4197 {
4198 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4199 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4200 fprintf (file, "not found in Omega dir vectors:\n");
4201 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4202 fprintf (file, "data dependence relation:\n");
4203 dump_data_dependence_relation (file, ddr);
4204 fprintf (file, ")\n");
4205 }
4206 }
4207
4208 return true;
4209 }
4210
4211 /* This computes the affine dependence relation between A and B with
4212 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4213 independence between two accesses, while CHREC_DONT_KNOW is used
4214 for representing the unknown relation.
4215
4216 Note that it is possible to stop the computation of the dependence
4217 relation the first time we detect a CHREC_KNOWN element for a given
4218 subscript. */
4219
4220 void
4221 compute_affine_dependence (struct data_dependence_relation *ddr,
4222 struct loop *loop_nest)
4223 {
4224 struct data_reference *dra = DDR_A (ddr);
4225 struct data_reference *drb = DDR_B (ddr);
4226
4227 if (dump_file && (dump_flags & TDF_DETAILS))
4228 {
4229 fprintf (dump_file, "(compute_affine_dependence\n");
4230 fprintf (dump_file, " stmt_a: ");
4231 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4232 fprintf (dump_file, " stmt_b: ");
4233 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4234 }
4235
4236 /* Analyze only when the dependence relation is not yet known. */
4237 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4238 {
4239 dependence_stats.num_dependence_tests++;
4240
4241 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4242 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4243 {
4244 subscript_dependence_tester (ddr, loop_nest);
4245
4246 if (flag_check_data_deps)
4247 {
4248 /* Dump the dependences from the first algorithm. */
4249 if (dump_file && (dump_flags & TDF_DETAILS))
4250 {
4251 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4252 dump_data_dependence_relation (dump_file, ddr);
4253 }
4254
4255 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4256 {
4257 bool maybe_dependent;
4258 vec<lambda_vector> dir_vects, dist_vects;
4259
4260 /* Save the result of the first DD analyzer. */
4261 dist_vects = DDR_DIST_VECTS (ddr);
4262 dir_vects = DDR_DIR_VECTS (ddr);
4263
4264 /* Reset the information. */
4265 DDR_DIST_VECTS (ddr).create (0);
4266 DDR_DIR_VECTS (ddr).create (0);
4267
4268 /* Compute the same information using Omega. */
4269 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4270 goto csys_dont_know;
4271
4272 if (dump_file && (dump_flags & TDF_DETAILS))
4273 {
4274 fprintf (dump_file, "Omega Analyzer\n");
4275 dump_data_dependence_relation (dump_file, ddr);
4276 }
4277
4278 /* Check that we get the same information. */
4279 if (maybe_dependent)
4280 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4281 dir_vects));
4282 }
4283 }
4284 }
4285
4286 /* As a last case, if the dependence cannot be determined, or if
4287 the dependence is considered too difficult to determine, answer
4288 "don't know". */
4289 else
4290 {
4291 csys_dont_know:;
4292 dependence_stats.num_dependence_undetermined++;
4293
4294 if (dump_file && (dump_flags & TDF_DETAILS))
4295 {
4296 fprintf (dump_file, "Data ref a:\n");
4297 dump_data_reference (dump_file, dra);
4298 fprintf (dump_file, "Data ref b:\n");
4299 dump_data_reference (dump_file, drb);
4300 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4301 }
4302 finalize_ddr_dependent (ddr, chrec_dont_know);
4303 }
4304 }
4305
4306 if (dump_file && (dump_flags & TDF_DETAILS))
4307 {
4308 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4309 fprintf (dump_file, ") -> no dependence\n");
4310 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4311 fprintf (dump_file, ") -> dependence analysis failed\n");
4312 else
4313 fprintf (dump_file, ")\n");
4314 }
4315 }
4316
4317 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4318 the data references in DATAREFS, in the LOOP_NEST. When
4319 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4320 relations. Return true when successful, i.e. data references number
4321 is small enough to be handled. */
4322
4323 bool
4324 compute_all_dependences (vec<data_reference_p> datarefs,
4325 vec<ddr_p> *dependence_relations,
4326 vec<loop_p> loop_nest,
4327 bool compute_self_and_rr)
4328 {
4329 struct data_dependence_relation *ddr;
4330 struct data_reference *a, *b;
4331 unsigned int i, j;
4332
4333 if ((int) datarefs.length ()
4334 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4335 {
4336 struct data_dependence_relation *ddr;
4337
4338 /* Insert a single relation into dependence_relations:
4339 chrec_dont_know. */
4340 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4341 dependence_relations->safe_push (ddr);
4342 return false;
4343 }
4344
4345 FOR_EACH_VEC_ELT (datarefs, i, a)
4346 for (j = i + 1; datarefs.iterate (j, &b); j++)
4347 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4348 {
4349 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4350 dependence_relations->safe_push (ddr);
4351 if (loop_nest.exists ())
4352 compute_affine_dependence (ddr, loop_nest[0]);
4353 }
4354
4355 if (compute_self_and_rr)
4356 FOR_EACH_VEC_ELT (datarefs, i, a)
4357 {
4358 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4359 dependence_relations->safe_push (ddr);
4360 if (loop_nest.exists ())
4361 compute_affine_dependence (ddr, loop_nest[0]);
4362 }
4363
4364 return true;
4365 }
4366
4367 /* Describes a location of a memory reference. */
4368
4369 typedef struct data_ref_loc_d
4370 {
4371 /* The memory reference. */
4372 tree ref;
4373
4374 /* True if the memory reference is read. */
4375 bool is_read;
4376 } data_ref_loc;
4377
4378
4379 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4380 true if STMT clobbers memory, false otherwise. */
4381
4382 static bool
4383 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4384 {
4385 bool clobbers_memory = false;
4386 data_ref_loc ref;
4387 tree op0, op1;
4388 enum gimple_code stmt_code = gimple_code (stmt);
4389
4390 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4391 As we cannot model data-references to not spelled out
4392 accesses give up if they may occur. */
4393 if (stmt_code == GIMPLE_CALL
4394 && !(gimple_call_flags (stmt) & ECF_CONST))
4395 {
4396 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4397 if (gimple_call_internal_p (stmt))
4398 switch (gimple_call_internal_fn (stmt))
4399 {
4400 case IFN_GOMP_SIMD_LANE:
4401 {
4402 struct loop *loop = gimple_bb (stmt)->loop_father;
4403 tree uid = gimple_call_arg (stmt, 0);
4404 gcc_assert (TREE_CODE (uid) == SSA_NAME);
4405 if (loop == NULL
4406 || loop->simduid != SSA_NAME_VAR (uid))
4407 clobbers_memory = true;
4408 break;
4409 }
4410 case IFN_MASK_LOAD:
4411 case IFN_MASK_STORE:
4412 break;
4413 default:
4414 clobbers_memory = true;
4415 break;
4416 }
4417 else
4418 clobbers_memory = true;
4419 }
4420 else if (stmt_code == GIMPLE_ASM
4421 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4422 || gimple_vuse (stmt)))
4423 clobbers_memory = true;
4424
4425 if (!gimple_vuse (stmt))
4426 return clobbers_memory;
4427
4428 if (stmt_code == GIMPLE_ASSIGN)
4429 {
4430 tree base;
4431 op0 = gimple_assign_lhs (stmt);
4432 op1 = gimple_assign_rhs1 (stmt);
4433
4434 if (DECL_P (op1)
4435 || (REFERENCE_CLASS_P (op1)
4436 && (base = get_base_address (op1))
4437 && TREE_CODE (base) != SSA_NAME))
4438 {
4439 ref.ref = op1;
4440 ref.is_read = true;
4441 references->safe_push (ref);
4442 }
4443 }
4444 else if (stmt_code == GIMPLE_CALL)
4445 {
4446 unsigned i, n;
4447
4448 ref.is_read = false;
4449 if (gimple_call_internal_p (stmt))
4450 switch (gimple_call_internal_fn (stmt))
4451 {
4452 case IFN_MASK_LOAD:
4453 if (gimple_call_lhs (stmt) == NULL_TREE)
4454 break;
4455 ref.is_read = true;
4456 case IFN_MASK_STORE:
4457 ref.ref = fold_build2 (MEM_REF,
4458 ref.is_read
4459 ? TREE_TYPE (gimple_call_lhs (stmt))
4460 : TREE_TYPE (gimple_call_arg (stmt, 3)),
4461 gimple_call_arg (stmt, 0),
4462 gimple_call_arg (stmt, 1));
4463 references->safe_push (ref);
4464 return false;
4465 default:
4466 break;
4467 }
4468
4469 op0 = gimple_call_lhs (stmt);
4470 n = gimple_call_num_args (stmt);
4471 for (i = 0; i < n; i++)
4472 {
4473 op1 = gimple_call_arg (stmt, i);
4474
4475 if (DECL_P (op1)
4476 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4477 {
4478 ref.ref = op1;
4479 ref.is_read = true;
4480 references->safe_push (ref);
4481 }
4482 }
4483 }
4484 else
4485 return clobbers_memory;
4486
4487 if (op0
4488 && (DECL_P (op0)
4489 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4490 {
4491 ref.ref = op0;
4492 ref.is_read = false;
4493 references->safe_push (ref);
4494 }
4495 return clobbers_memory;
4496 }
4497
4498 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4499 reference, returns false, otherwise returns true. NEST is the outermost
4500 loop of the loop nest in which the references should be analyzed. */
4501
4502 bool
4503 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4504 vec<data_reference_p> *datarefs)
4505 {
4506 unsigned i;
4507 auto_vec<data_ref_loc, 2> references;
4508 data_ref_loc *ref;
4509 bool ret = true;
4510 data_reference_p dr;
4511
4512 if (get_references_in_stmt (stmt, &references))
4513 return false;
4514
4515 FOR_EACH_VEC_ELT (references, i, ref)
4516 {
4517 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4518 ref->ref, stmt, ref->is_read);
4519 gcc_assert (dr != NULL);
4520 datarefs->safe_push (dr);
4521 }
4522 references.release ();
4523 return ret;
4524 }
4525
4526 /* Stores the data references in STMT to DATAREFS. If there is an
4527 unanalyzable reference, returns false, otherwise returns true.
4528 NEST is the outermost loop of the loop nest in which the references
4529 should be instantiated, LOOP is the loop in which the references
4530 should be analyzed. */
4531
4532 bool
4533 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4534 vec<data_reference_p> *datarefs)
4535 {
4536 unsigned i;
4537 auto_vec<data_ref_loc, 2> references;
4538 data_ref_loc *ref;
4539 bool ret = true;
4540 data_reference_p dr;
4541
4542 if (get_references_in_stmt (stmt, &references))
4543 return false;
4544
4545 FOR_EACH_VEC_ELT (references, i, ref)
4546 {
4547 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4548 gcc_assert (dr != NULL);
4549 datarefs->safe_push (dr);
4550 }
4551
4552 references.release ();
4553 return ret;
4554 }
4555
4556 /* Search the data references in LOOP, and record the information into
4557 DATAREFS. Returns chrec_dont_know when failing to analyze a
4558 difficult case, returns NULL_TREE otherwise. */
4559
4560 tree
4561 find_data_references_in_bb (struct loop *loop, basic_block bb,
4562 vec<data_reference_p> *datarefs)
4563 {
4564 gimple_stmt_iterator bsi;
4565
4566 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4567 {
4568 gimple stmt = gsi_stmt (bsi);
4569
4570 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4571 {
4572 struct data_reference *res;
4573 res = XCNEW (struct data_reference);
4574 datarefs->safe_push (res);
4575
4576 return chrec_dont_know;
4577 }
4578 }
4579
4580 return NULL_TREE;
4581 }
4582
4583 /* Search the data references in LOOP, and record the information into
4584 DATAREFS. Returns chrec_dont_know when failing to analyze a
4585 difficult case, returns NULL_TREE otherwise.
4586
4587 TODO: This function should be made smarter so that it can handle address
4588 arithmetic as if they were array accesses, etc. */
4589
4590 tree
4591 find_data_references_in_loop (struct loop *loop,
4592 vec<data_reference_p> *datarefs)
4593 {
4594 basic_block bb, *bbs;
4595 unsigned int i;
4596
4597 bbs = get_loop_body_in_dom_order (loop);
4598
4599 for (i = 0; i < loop->num_nodes; i++)
4600 {
4601 bb = bbs[i];
4602
4603 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4604 {
4605 free (bbs);
4606 return chrec_dont_know;
4607 }
4608 }
4609 free (bbs);
4610
4611 return NULL_TREE;
4612 }
4613
4614 /* Recursive helper function. */
4615
4616 static bool
4617 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4618 {
4619 /* Inner loops of the nest should not contain siblings. Example:
4620 when there are two consecutive loops,
4621
4622 | loop_0
4623 | loop_1
4624 | A[{0, +, 1}_1]
4625 | endloop_1
4626 | loop_2
4627 | A[{0, +, 1}_2]
4628 | endloop_2
4629 | endloop_0
4630
4631 the dependence relation cannot be captured by the distance
4632 abstraction. */
4633 if (loop->next)
4634 return false;
4635
4636 loop_nest->safe_push (loop);
4637 if (loop->inner)
4638 return find_loop_nest_1 (loop->inner, loop_nest);
4639 return true;
4640 }
4641
4642 /* Return false when the LOOP is not well nested. Otherwise return
4643 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4644 contain the loops from the outermost to the innermost, as they will
4645 appear in the classic distance vector. */
4646
4647 bool
4648 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4649 {
4650 loop_nest->safe_push (loop);
4651 if (loop->inner)
4652 return find_loop_nest_1 (loop->inner, loop_nest);
4653 return true;
4654 }
4655
4656 /* Returns true when the data dependences have been computed, false otherwise.
4657 Given a loop nest LOOP, the following vectors are returned:
4658 DATAREFS is initialized to all the array elements contained in this loop,
4659 DEPENDENCE_RELATIONS contains the relations between the data references.
4660 Compute read-read and self relations if
4661 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4662
4663 bool
4664 compute_data_dependences_for_loop (struct loop *loop,
4665 bool compute_self_and_read_read_dependences,
4666 vec<loop_p> *loop_nest,
4667 vec<data_reference_p> *datarefs,
4668 vec<ddr_p> *dependence_relations)
4669 {
4670 bool res = true;
4671
4672 memset (&dependence_stats, 0, sizeof (dependence_stats));
4673
4674 /* If the loop nest is not well formed, or one of the data references
4675 is not computable, give up without spending time to compute other
4676 dependences. */
4677 if (!loop
4678 || !find_loop_nest (loop, loop_nest)
4679 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4680 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4681 compute_self_and_read_read_dependences))
4682 res = false;
4683
4684 if (dump_file && (dump_flags & TDF_STATS))
4685 {
4686 fprintf (dump_file, "Dependence tester statistics:\n");
4687
4688 fprintf (dump_file, "Number of dependence tests: %d\n",
4689 dependence_stats.num_dependence_tests);
4690 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4691 dependence_stats.num_dependence_dependent);
4692 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4693 dependence_stats.num_dependence_independent);
4694 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4695 dependence_stats.num_dependence_undetermined);
4696
4697 fprintf (dump_file, "Number of subscript tests: %d\n",
4698 dependence_stats.num_subscript_tests);
4699 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4700 dependence_stats.num_subscript_undetermined);
4701 fprintf (dump_file, "Number of same subscript function: %d\n",
4702 dependence_stats.num_same_subscript_function);
4703
4704 fprintf (dump_file, "Number of ziv tests: %d\n",
4705 dependence_stats.num_ziv);
4706 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4707 dependence_stats.num_ziv_dependent);
4708 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4709 dependence_stats.num_ziv_independent);
4710 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4711 dependence_stats.num_ziv_unimplemented);
4712
4713 fprintf (dump_file, "Number of siv tests: %d\n",
4714 dependence_stats.num_siv);
4715 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4716 dependence_stats.num_siv_dependent);
4717 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4718 dependence_stats.num_siv_independent);
4719 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4720 dependence_stats.num_siv_unimplemented);
4721
4722 fprintf (dump_file, "Number of miv tests: %d\n",
4723 dependence_stats.num_miv);
4724 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4725 dependence_stats.num_miv_dependent);
4726 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4727 dependence_stats.num_miv_independent);
4728 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4729 dependence_stats.num_miv_unimplemented);
4730 }
4731
4732 return res;
4733 }
4734
4735 /* Returns true when the data dependences for the basic block BB have been
4736 computed, false otherwise.
4737 DATAREFS is initialized to all the array elements contained in this basic
4738 block, DEPENDENCE_RELATIONS contains the relations between the data
4739 references. Compute read-read and self relations if
4740 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4741 bool
4742 compute_data_dependences_for_bb (basic_block bb,
4743 bool compute_self_and_read_read_dependences,
4744 vec<data_reference_p> *datarefs,
4745 vec<ddr_p> *dependence_relations)
4746 {
4747 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4748 return false;
4749
4750 return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4751 compute_self_and_read_read_dependences);
4752 }
4753
4754 /* Entry point (for testing only). Analyze all the data references
4755 and the dependence relations in LOOP.
4756
4757 The data references are computed first.
4758
4759 A relation on these nodes is represented by a complete graph. Some
4760 of the relations could be of no interest, thus the relations can be
4761 computed on demand.
4762
4763 In the following function we compute all the relations. This is
4764 just a first implementation that is here for:
4765 - for showing how to ask for the dependence relations,
4766 - for the debugging the whole dependence graph,
4767 - for the dejagnu testcases and maintenance.
4768
4769 It is possible to ask only for a part of the graph, avoiding to
4770 compute the whole dependence graph. The computed dependences are
4771 stored in a knowledge base (KB) such that later queries don't
4772 recompute the same information. The implementation of this KB is
4773 transparent to the optimizer, and thus the KB can be changed with a
4774 more efficient implementation, or the KB could be disabled. */
4775 static void
4776 analyze_all_data_dependences (struct loop *loop)
4777 {
4778 unsigned int i;
4779 int nb_data_refs = 10;
4780 vec<data_reference_p> datarefs;
4781 datarefs.create (nb_data_refs);
4782 vec<ddr_p> dependence_relations;
4783 dependence_relations.create (nb_data_refs * nb_data_refs);
4784 vec<loop_p> loop_nest;
4785 loop_nest.create (3);
4786
4787 /* Compute DDs on the whole function. */
4788 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4789 &dependence_relations);
4790
4791 if (dump_file)
4792 {
4793 dump_data_dependence_relations (dump_file, dependence_relations);
4794 fprintf (dump_file, "\n\n");
4795
4796 if (dump_flags & TDF_DETAILS)
4797 dump_dist_dir_vectors (dump_file, dependence_relations);
4798
4799 if (dump_flags & TDF_STATS)
4800 {
4801 unsigned nb_top_relations = 0;
4802 unsigned nb_bot_relations = 0;
4803 unsigned nb_chrec_relations = 0;
4804 struct data_dependence_relation *ddr;
4805
4806 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4807 {
4808 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4809 nb_top_relations++;
4810
4811 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4812 nb_bot_relations++;
4813
4814 else
4815 nb_chrec_relations++;
4816 }
4817
4818 gather_stats_on_scev_database ();
4819 }
4820 }
4821
4822 loop_nest.release ();
4823 free_dependence_relations (dependence_relations);
4824 free_data_refs (datarefs);
4825 }
4826
4827 /* Computes all the data dependences and check that the results of
4828 several analyzers are the same. */
4829
4830 void
4831 tree_check_data_deps (void)
4832 {
4833 struct loop *loop_nest;
4834
4835 FOR_EACH_LOOP (loop_nest, 0)
4836 analyze_all_data_dependences (loop_nest);
4837 }
4838
4839 /* Free the memory used by a data dependence relation DDR. */
4840
4841 void
4842 free_dependence_relation (struct data_dependence_relation *ddr)
4843 {
4844 if (ddr == NULL)
4845 return;
4846
4847 if (DDR_SUBSCRIPTS (ddr).exists ())
4848 free_subscripts (DDR_SUBSCRIPTS (ddr));
4849 DDR_DIST_VECTS (ddr).release ();
4850 DDR_DIR_VECTS (ddr).release ();
4851
4852 free (ddr);
4853 }
4854
4855 /* Free the memory used by the data dependence relations from
4856 DEPENDENCE_RELATIONS. */
4857
4858 void
4859 free_dependence_relations (vec<ddr_p> dependence_relations)
4860 {
4861 unsigned int i;
4862 struct data_dependence_relation *ddr;
4863
4864 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4865 if (ddr)
4866 free_dependence_relation (ddr);
4867
4868 dependence_relations.release ();
4869 }
4870
4871 /* Free the memory used by the data references from DATAREFS. */
4872
4873 void
4874 free_data_refs (vec<data_reference_p> datarefs)
4875 {
4876 unsigned int i;
4877 struct data_reference *dr;
4878
4879 FOR_EACH_VEC_ELT (datarefs, i, dr)
4880 free_data_ref (dr);
4881 datarefs.release ();
4882 }