alias.c: Reorder #include statements and remove duplicates.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
24
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
30
31 The goals of this analysis are:
32
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
36
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
39
40 - distance vectors
41 - direction vectors
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
45
46 - to define a knowledge base for storing the data dependence
47 information,
48
49 - to define an interface to access this data.
50
51
52 Definitions:
53
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
58
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
61 | 3*x + 2*y = 1
62 has an integer solution x = 1 and y = -1.
63
64 References:
65
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
69
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
71 by Utpal Banerjee.
72
73
74 */
75
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "expmed.h"
84 #include "insn-config.h"
85 #include "emit-rtl.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "flags.h"
90 #include "dojump.h"
91 #include "explow.h"
92 #include "calls.h"
93 #include "varasm.h"
94 #include "stmt.h"
95 #include "expr.h"
96 #include "internal-fn.h"
97 #include "gimple-iterator.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
100 #include "tree-ssa.h"
101 #include "cfgloop.h"
102 #include "tree-data-ref.h"
103 #include "tree-scalar-evolution.h"
104 #include "dumpfile.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "params.h"
108
109 static struct datadep_stats
110 {
111 int num_dependence_tests;
112 int num_dependence_dependent;
113 int num_dependence_independent;
114 int num_dependence_undetermined;
115
116 int num_subscript_tests;
117 int num_subscript_undetermined;
118 int num_same_subscript_function;
119
120 int num_ziv;
121 int num_ziv_independent;
122 int num_ziv_dependent;
123 int num_ziv_unimplemented;
124
125 int num_siv;
126 int num_siv_independent;
127 int num_siv_dependent;
128 int num_siv_unimplemented;
129
130 int num_miv;
131 int num_miv_independent;
132 int num_miv_dependent;
133 int num_miv_unimplemented;
134 } dependence_stats;
135
136 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
137 struct data_reference *,
138 struct data_reference *,
139 struct loop *);
140 /* Returns true iff A divides B. */
141
142 static inline bool
143 tree_fold_divides_p (const_tree a, const_tree b)
144 {
145 gcc_assert (TREE_CODE (a) == INTEGER_CST);
146 gcc_assert (TREE_CODE (b) == INTEGER_CST);
147 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
148 }
149
150 /* Returns true iff A divides B. */
151
152 static inline bool
153 int_divides_p (int a, int b)
154 {
155 return ((b % a) == 0);
156 }
157
158 \f
159
160 /* Dump into FILE all the data references from DATAREFS. */
161
162 static void
163 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
164 {
165 unsigned int i;
166 struct data_reference *dr;
167
168 FOR_EACH_VEC_ELT (datarefs, i, dr)
169 dump_data_reference (file, dr);
170 }
171
172 /* Unified dump into FILE all the data references from DATAREFS. */
173
174 DEBUG_FUNCTION void
175 debug (vec<data_reference_p> &ref)
176 {
177 dump_data_references (stderr, ref);
178 }
179
180 DEBUG_FUNCTION void
181 debug (vec<data_reference_p> *ptr)
182 {
183 if (ptr)
184 debug (*ptr);
185 else
186 fprintf (stderr, "<nil>\n");
187 }
188
189
190 /* Dump into STDERR all the data references from DATAREFS. */
191
192 DEBUG_FUNCTION void
193 debug_data_references (vec<data_reference_p> datarefs)
194 {
195 dump_data_references (stderr, datarefs);
196 }
197
198 /* Print to STDERR the data_reference DR. */
199
200 DEBUG_FUNCTION void
201 debug_data_reference (struct data_reference *dr)
202 {
203 dump_data_reference (stderr, dr);
204 }
205
206 /* Dump function for a DATA_REFERENCE structure. */
207
208 void
209 dump_data_reference (FILE *outf,
210 struct data_reference *dr)
211 {
212 unsigned int i;
213
214 fprintf (outf, "#(Data Ref: \n");
215 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
216 fprintf (outf, "# stmt: ");
217 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
218 fprintf (outf, "# ref: ");
219 print_generic_stmt (outf, DR_REF (dr), 0);
220 fprintf (outf, "# base_object: ");
221 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
222
223 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
224 {
225 fprintf (outf, "# Access function %d: ", i);
226 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
227 }
228 fprintf (outf, "#)\n");
229 }
230
231 /* Unified dump function for a DATA_REFERENCE structure. */
232
233 DEBUG_FUNCTION void
234 debug (data_reference &ref)
235 {
236 dump_data_reference (stderr, &ref);
237 }
238
239 DEBUG_FUNCTION void
240 debug (data_reference *ptr)
241 {
242 if (ptr)
243 debug (*ptr);
244 else
245 fprintf (stderr, "<nil>\n");
246 }
247
248
249 /* Dumps the affine function described by FN to the file OUTF. */
250
251 DEBUG_FUNCTION void
252 dump_affine_function (FILE *outf, affine_fn fn)
253 {
254 unsigned i;
255 tree coef;
256
257 print_generic_expr (outf, fn[0], TDF_SLIM);
258 for (i = 1; fn.iterate (i, &coef); i++)
259 {
260 fprintf (outf, " + ");
261 print_generic_expr (outf, coef, TDF_SLIM);
262 fprintf (outf, " * x_%u", i);
263 }
264 }
265
266 /* Dumps the conflict function CF to the file OUTF. */
267
268 DEBUG_FUNCTION void
269 dump_conflict_function (FILE *outf, conflict_function *cf)
270 {
271 unsigned i;
272
273 if (cf->n == NO_DEPENDENCE)
274 fprintf (outf, "no dependence");
275 else if (cf->n == NOT_KNOWN)
276 fprintf (outf, "not known");
277 else
278 {
279 for (i = 0; i < cf->n; i++)
280 {
281 if (i != 0)
282 fprintf (outf, " ");
283 fprintf (outf, "[");
284 dump_affine_function (outf, cf->fns[i]);
285 fprintf (outf, "]");
286 }
287 }
288 }
289
290 /* Dump function for a SUBSCRIPT structure. */
291
292 DEBUG_FUNCTION void
293 dump_subscript (FILE *outf, struct subscript *subscript)
294 {
295 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
296
297 fprintf (outf, "\n (subscript \n");
298 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
299 dump_conflict_function (outf, cf);
300 if (CF_NONTRIVIAL_P (cf))
301 {
302 tree last_iteration = SUB_LAST_CONFLICT (subscript);
303 fprintf (outf, "\n last_conflict: ");
304 print_generic_expr (outf, last_iteration, 0);
305 }
306
307 cf = SUB_CONFLICTS_IN_B (subscript);
308 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: ");
309 dump_conflict_function (outf, cf);
310 if (CF_NONTRIVIAL_P (cf))
311 {
312 tree last_iteration = SUB_LAST_CONFLICT (subscript);
313 fprintf (outf, "\n last_conflict: ");
314 print_generic_expr (outf, last_iteration, 0);
315 }
316
317 fprintf (outf, "\n (Subscript distance: ");
318 print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
319 fprintf (outf, " ))\n");
320 }
321
322 /* Print the classic direction vector DIRV to OUTF. */
323
324 DEBUG_FUNCTION void
325 print_direction_vector (FILE *outf,
326 lambda_vector dirv,
327 int length)
328 {
329 int eq;
330
331 for (eq = 0; eq < length; eq++)
332 {
333 enum data_dependence_direction dir = ((enum data_dependence_direction)
334 dirv[eq]);
335
336 switch (dir)
337 {
338 case dir_positive:
339 fprintf (outf, " +");
340 break;
341 case dir_negative:
342 fprintf (outf, " -");
343 break;
344 case dir_equal:
345 fprintf (outf, " =");
346 break;
347 case dir_positive_or_equal:
348 fprintf (outf, " +=");
349 break;
350 case dir_positive_or_negative:
351 fprintf (outf, " +-");
352 break;
353 case dir_negative_or_equal:
354 fprintf (outf, " -=");
355 break;
356 case dir_star:
357 fprintf (outf, " *");
358 break;
359 default:
360 fprintf (outf, "indep");
361 break;
362 }
363 }
364 fprintf (outf, "\n");
365 }
366
367 /* Print a vector of direction vectors. */
368
369 DEBUG_FUNCTION void
370 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
371 int length)
372 {
373 unsigned j;
374 lambda_vector v;
375
376 FOR_EACH_VEC_ELT (dir_vects, j, v)
377 print_direction_vector (outf, v, length);
378 }
379
380 /* Print out a vector VEC of length N to OUTFILE. */
381
382 DEBUG_FUNCTION void
383 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
384 {
385 int i;
386
387 for (i = 0; i < n; i++)
388 fprintf (outfile, "%3d ", vector[i]);
389 fprintf (outfile, "\n");
390 }
391
392 /* Print a vector of distance vectors. */
393
394 DEBUG_FUNCTION void
395 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
396 int length)
397 {
398 unsigned j;
399 lambda_vector v;
400
401 FOR_EACH_VEC_ELT (dist_vects, j, v)
402 print_lambda_vector (outf, v, length);
403 }
404
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
406
407 DEBUG_FUNCTION void
408 dump_data_dependence_relation (FILE *outf,
409 struct data_dependence_relation *ddr)
410 {
411 struct data_reference *dra, *drb;
412
413 fprintf (outf, "(Data Dep: \n");
414
415 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416 {
417 if (ddr)
418 {
419 dra = DDR_A (ddr);
420 drb = DDR_B (ddr);
421 if (dra)
422 dump_data_reference (outf, dra);
423 else
424 fprintf (outf, " (nil)\n");
425 if (drb)
426 dump_data_reference (outf, drb);
427 else
428 fprintf (outf, " (nil)\n");
429 }
430 fprintf (outf, " (don't know)\n)\n");
431 return;
432 }
433
434 dra = DDR_A (ddr);
435 drb = DDR_B (ddr);
436 dump_data_reference (outf, dra);
437 dump_data_reference (outf, drb);
438
439 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440 fprintf (outf, " (no dependence)\n");
441
442 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443 {
444 unsigned int i;
445 struct loop *loopi;
446
447 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
448 {
449 fprintf (outf, " access_fn_A: ");
450 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
451 fprintf (outf, " access_fn_B: ");
452 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
453 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
454 }
455
456 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
457 fprintf (outf, " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 fprintf (outf, "%d ", loopi->num);
460 fprintf (outf, ")\n");
461
462 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 {
464 fprintf (outf, " distance_vector: ");
465 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
466 DDR_NB_LOOPS (ddr));
467 }
468
469 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 {
471 fprintf (outf, " direction_vector: ");
472 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 DDR_NB_LOOPS (ddr));
474 }
475 }
476
477 fprintf (outf, ")\n");
478 }
479
480 /* Debug version. */
481
482 DEBUG_FUNCTION void
483 debug_data_dependence_relation (struct data_dependence_relation *ddr)
484 {
485 dump_data_dependence_relation (stderr, ddr);
486 }
487
488 /* Dump into FILE all the dependence relations from DDRS. */
489
490 DEBUG_FUNCTION void
491 dump_data_dependence_relations (FILE *file,
492 vec<ddr_p> ddrs)
493 {
494 unsigned int i;
495 struct data_dependence_relation *ddr;
496
497 FOR_EACH_VEC_ELT (ddrs, i, ddr)
498 dump_data_dependence_relation (file, ddr);
499 }
500
501 DEBUG_FUNCTION void
502 debug (vec<ddr_p> &ref)
503 {
504 dump_data_dependence_relations (stderr, ref);
505 }
506
507 DEBUG_FUNCTION void
508 debug (vec<ddr_p> *ptr)
509 {
510 if (ptr)
511 debug (*ptr);
512 else
513 fprintf (stderr, "<nil>\n");
514 }
515
516
517 /* Dump to STDERR all the dependence relations from DDRS. */
518
519 DEBUG_FUNCTION void
520 debug_data_dependence_relations (vec<ddr_p> ddrs)
521 {
522 dump_data_dependence_relations (stderr, ddrs);
523 }
524
525 /* Dumps the distance and direction vectors in FILE. DDRS contains
526 the dependence relations, and VECT_SIZE is the size of the
527 dependence vectors, or in other words the number of loops in the
528 considered nest. */
529
530 DEBUG_FUNCTION void
531 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
532 {
533 unsigned int i, j;
534 struct data_dependence_relation *ddr;
535 lambda_vector v;
536
537 FOR_EACH_VEC_ELT (ddrs, i, ddr)
538 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
539 {
540 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
541 {
542 fprintf (file, "DISTANCE_V (");
543 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
544 fprintf (file, ")\n");
545 }
546
547 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
548 {
549 fprintf (file, "DIRECTION_V (");
550 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
551 fprintf (file, ")\n");
552 }
553 }
554
555 fprintf (file, "\n\n");
556 }
557
558 /* Dumps the data dependence relations DDRS in FILE. */
559
560 DEBUG_FUNCTION void
561 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
562 {
563 unsigned int i;
564 struct data_dependence_relation *ddr;
565
566 FOR_EACH_VEC_ELT (ddrs, i, ddr)
567 dump_data_dependence_relation (file, ddr);
568
569 fprintf (file, "\n\n");
570 }
571
572 DEBUG_FUNCTION void
573 debug_ddrs (vec<ddr_p> ddrs)
574 {
575 dump_ddrs (stderr, ddrs);
576 }
577
578 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
579 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
580 constant of type ssizetype, and returns true. If we cannot do this
581 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
582 is returned. */
583
584 static bool
585 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
586 tree *var, tree *off)
587 {
588 tree var0, var1;
589 tree off0, off1;
590 enum tree_code ocode = code;
591
592 *var = NULL_TREE;
593 *off = NULL_TREE;
594
595 switch (code)
596 {
597 case INTEGER_CST:
598 *var = build_int_cst (type, 0);
599 *off = fold_convert (ssizetype, op0);
600 return true;
601
602 case POINTER_PLUS_EXPR:
603 ocode = PLUS_EXPR;
604 /* FALLTHROUGH */
605 case PLUS_EXPR:
606 case MINUS_EXPR:
607 split_constant_offset (op0, &var0, &off0);
608 split_constant_offset (op1, &var1, &off1);
609 *var = fold_build2 (code, type, var0, var1);
610 *off = size_binop (ocode, off0, off1);
611 return true;
612
613 case MULT_EXPR:
614 if (TREE_CODE (op1) != INTEGER_CST)
615 return false;
616
617 split_constant_offset (op0, &var0, &off0);
618 *var = fold_build2 (MULT_EXPR, type, var0, op1);
619 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
620 return true;
621
622 case ADDR_EXPR:
623 {
624 tree base, poffset;
625 HOST_WIDE_INT pbitsize, pbitpos;
626 machine_mode pmode;
627 int punsignedp, pvolatilep;
628
629 op0 = TREE_OPERAND (op0, 0);
630 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
631 &pmode, &punsignedp, &pvolatilep, false);
632
633 if (pbitpos % BITS_PER_UNIT != 0)
634 return false;
635 base = build_fold_addr_expr (base);
636 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
637
638 if (poffset)
639 {
640 split_constant_offset (poffset, &poffset, &off1);
641 off0 = size_binop (PLUS_EXPR, off0, off1);
642 if (POINTER_TYPE_P (TREE_TYPE (base)))
643 base = fold_build_pointer_plus (base, poffset);
644 else
645 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
646 fold_convert (TREE_TYPE (base), poffset));
647 }
648
649 var0 = fold_convert (type, base);
650
651 /* If variable length types are involved, punt, otherwise casts
652 might be converted into ARRAY_REFs in gimplify_conversion.
653 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
654 possibly no longer appears in current GIMPLE, might resurface.
655 This perhaps could run
656 if (CONVERT_EXPR_P (var0))
657 {
658 gimplify_conversion (&var0);
659 // Attempt to fill in any within var0 found ARRAY_REF's
660 // element size from corresponding op embedded ARRAY_REF,
661 // if unsuccessful, just punt.
662 } */
663 while (POINTER_TYPE_P (type))
664 type = TREE_TYPE (type);
665 if (int_size_in_bytes (type) < 0)
666 return false;
667
668 *var = var0;
669 *off = off0;
670 return true;
671 }
672
673 case SSA_NAME:
674 {
675 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
676 return false;
677
678 gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
679 enum tree_code subcode;
680
681 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
682 return false;
683
684 var0 = gimple_assign_rhs1 (def_stmt);
685 subcode = gimple_assign_rhs_code (def_stmt);
686 var1 = gimple_assign_rhs2 (def_stmt);
687
688 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
689 }
690 CASE_CONVERT:
691 {
692 /* We must not introduce undefined overflow, and we must not change the value.
693 Hence we're okay if the inner type doesn't overflow to start with
694 (pointer or signed), the outer type also is an integer or pointer
695 and the outer precision is at least as large as the inner. */
696 tree itype = TREE_TYPE (op0);
697 if ((POINTER_TYPE_P (itype)
698 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
699 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
700 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
701 {
702 split_constant_offset (op0, &var0, off);
703 *var = fold_convert (type, var0);
704 return true;
705 }
706 return false;
707 }
708
709 default:
710 return false;
711 }
712 }
713
714 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
715 will be ssizetype. */
716
717 void
718 split_constant_offset (tree exp, tree *var, tree *off)
719 {
720 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
721 enum tree_code code;
722
723 *var = exp;
724 *off = ssize_int (0);
725 STRIP_NOPS (exp);
726
727 if (tree_is_chrec (exp)
728 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
729 return;
730
731 otype = TREE_TYPE (exp);
732 code = TREE_CODE (exp);
733 extract_ops_from_tree (exp, &code, &op0, &op1);
734 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
735 {
736 *var = fold_convert (type, e);
737 *off = o;
738 }
739 }
740
741 /* Returns the address ADDR of an object in a canonical shape (without nop
742 casts, and with type of pointer to the object). */
743
744 static tree
745 canonicalize_base_object_address (tree addr)
746 {
747 tree orig = addr;
748
749 STRIP_NOPS (addr);
750
751 /* The base address may be obtained by casting from integer, in that case
752 keep the cast. */
753 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
754 return orig;
755
756 if (TREE_CODE (addr) != ADDR_EXPR)
757 return addr;
758
759 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
760 }
761
762 /* Analyzes the behavior of the memory reference DR in the innermost loop or
763 basic block that contains it. Returns true if analysis succeed or false
764 otherwise. */
765
766 bool
767 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
768 {
769 gimple *stmt = DR_STMT (dr);
770 struct loop *loop = loop_containing_stmt (stmt);
771 tree ref = DR_REF (dr);
772 HOST_WIDE_INT pbitsize, pbitpos;
773 tree base, poffset;
774 machine_mode pmode;
775 int punsignedp, pvolatilep;
776 affine_iv base_iv, offset_iv;
777 tree init, dinit, step;
778 bool in_loop = (loop && loop->num);
779
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "analyze_innermost: ");
782
783 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
784 &pmode, &punsignedp, &pvolatilep, false);
785 gcc_assert (base != NULL_TREE);
786
787 if (pbitpos % BITS_PER_UNIT != 0)
788 {
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "failed: bit offset alignment.\n");
791 return false;
792 }
793
794 if (TREE_CODE (base) == MEM_REF)
795 {
796 if (!integer_zerop (TREE_OPERAND (base, 1)))
797 {
798 offset_int moff = mem_ref_offset (base);
799 tree mofft = wide_int_to_tree (sizetype, moff);
800 if (!poffset)
801 poffset = mofft;
802 else
803 poffset = size_binop (PLUS_EXPR, poffset, mofft);
804 }
805 base = TREE_OPERAND (base, 0);
806 }
807 else
808 base = build_fold_addr_expr (base);
809
810 if (in_loop)
811 {
812 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
813 nest ? true : false))
814 {
815 if (nest)
816 {
817 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "failed: evolution of base is not"
819 " affine.\n");
820 return false;
821 }
822 else
823 {
824 base_iv.base = base;
825 base_iv.step = ssize_int (0);
826 base_iv.no_overflow = true;
827 }
828 }
829 }
830 else
831 {
832 base_iv.base = base;
833 base_iv.step = ssize_int (0);
834 base_iv.no_overflow = true;
835 }
836
837 if (!poffset)
838 {
839 offset_iv.base = ssize_int (0);
840 offset_iv.step = ssize_int (0);
841 }
842 else
843 {
844 if (!in_loop)
845 {
846 offset_iv.base = poffset;
847 offset_iv.step = ssize_int (0);
848 }
849 else if (!simple_iv (loop, loop_containing_stmt (stmt),
850 poffset, &offset_iv,
851 nest ? true : false))
852 {
853 if (nest)
854 {
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 fprintf (dump_file, "failed: evolution of offset is not"
857 " affine.\n");
858 return false;
859 }
860 else
861 {
862 offset_iv.base = poffset;
863 offset_iv.step = ssize_int (0);
864 }
865 }
866 }
867
868 init = ssize_int (pbitpos / BITS_PER_UNIT);
869 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
870 init = size_binop (PLUS_EXPR, init, dinit);
871 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
872 init = size_binop (PLUS_EXPR, init, dinit);
873
874 step = size_binop (PLUS_EXPR,
875 fold_convert (ssizetype, base_iv.step),
876 fold_convert (ssizetype, offset_iv.step));
877
878 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
879
880 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
881 DR_INIT (dr) = init;
882 DR_STEP (dr) = step;
883
884 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
885
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "success.\n");
888
889 return true;
890 }
891
892 /* Determines the base object and the list of indices of memory reference
893 DR, analyzed in LOOP and instantiated in loop nest NEST. */
894
895 static void
896 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
897 {
898 vec<tree> access_fns = vNULL;
899 tree ref, op;
900 tree base, off, access_fn;
901 basic_block before_loop;
902
903 /* If analyzing a basic-block there are no indices to analyze
904 and thus no access functions. */
905 if (!nest)
906 {
907 DR_BASE_OBJECT (dr) = DR_REF (dr);
908 DR_ACCESS_FNS (dr).create (0);
909 return;
910 }
911
912 ref = DR_REF (dr);
913 before_loop = block_before_loop (nest);
914
915 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
916 into a two element array with a constant index. The base is
917 then just the immediate underlying object. */
918 if (TREE_CODE (ref) == REALPART_EXPR)
919 {
920 ref = TREE_OPERAND (ref, 0);
921 access_fns.safe_push (integer_zero_node);
922 }
923 else if (TREE_CODE (ref) == IMAGPART_EXPR)
924 {
925 ref = TREE_OPERAND (ref, 0);
926 access_fns.safe_push (integer_one_node);
927 }
928
929 /* Analyze access functions of dimensions we know to be independent. */
930 while (handled_component_p (ref))
931 {
932 if (TREE_CODE (ref) == ARRAY_REF)
933 {
934 op = TREE_OPERAND (ref, 1);
935 access_fn = analyze_scalar_evolution (loop, op);
936 access_fn = instantiate_scev (before_loop, loop, access_fn);
937 access_fns.safe_push (access_fn);
938 }
939 else if (TREE_CODE (ref) == COMPONENT_REF
940 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
941 {
942 /* For COMPONENT_REFs of records (but not unions!) use the
943 FIELD_DECL offset as constant access function so we can
944 disambiguate a[i].f1 and a[i].f2. */
945 tree off = component_ref_field_offset (ref);
946 off = size_binop (PLUS_EXPR,
947 size_binop (MULT_EXPR,
948 fold_convert (bitsizetype, off),
949 bitsize_int (BITS_PER_UNIT)),
950 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
951 access_fns.safe_push (off);
952 }
953 else
954 /* If we have an unhandled component we could not translate
955 to an access function stop analyzing. We have determined
956 our base object in this case. */
957 break;
958
959 ref = TREE_OPERAND (ref, 0);
960 }
961
962 /* If the address operand of a MEM_REF base has an evolution in the
963 analyzed nest, add it as an additional independent access-function. */
964 if (TREE_CODE (ref) == MEM_REF)
965 {
966 op = TREE_OPERAND (ref, 0);
967 access_fn = analyze_scalar_evolution (loop, op);
968 access_fn = instantiate_scev (before_loop, loop, access_fn);
969 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
970 {
971 tree orig_type;
972 tree memoff = TREE_OPERAND (ref, 1);
973 base = initial_condition (access_fn);
974 orig_type = TREE_TYPE (base);
975 STRIP_USELESS_TYPE_CONVERSION (base);
976 split_constant_offset (base, &base, &off);
977 STRIP_USELESS_TYPE_CONVERSION (base);
978 /* Fold the MEM_REF offset into the evolutions initial
979 value to make more bases comparable. */
980 if (!integer_zerop (memoff))
981 {
982 off = size_binop (PLUS_EXPR, off,
983 fold_convert (ssizetype, memoff));
984 memoff = build_int_cst (TREE_TYPE (memoff), 0);
985 }
986 /* Adjust the offset so it is a multiple of the access type
987 size and thus we separate bases that can possibly be used
988 to produce partial overlaps (which the access_fn machinery
989 cannot handle). */
990 wide_int rem;
991 if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
992 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
993 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
994 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED);
995 else
996 /* If we can't compute the remainder simply force the initial
997 condition to zero. */
998 rem = off;
999 off = wide_int_to_tree (ssizetype, wi::sub (off, rem));
1000 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1001 /* And finally replace the initial condition. */
1002 access_fn = chrec_replace_initial_condition
1003 (access_fn, fold_convert (orig_type, off));
1004 /* ??? This is still not a suitable base object for
1005 dr_may_alias_p - the base object needs to be an
1006 access that covers the object as whole. With
1007 an evolution in the pointer this cannot be
1008 guaranteed.
1009 As a band-aid, mark the access so we can special-case
1010 it in dr_may_alias_p. */
1011 tree old = ref;
1012 ref = fold_build2_loc (EXPR_LOCATION (ref),
1013 MEM_REF, TREE_TYPE (ref),
1014 base, memoff);
1015 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1016 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1017 DR_UNCONSTRAINED_BASE (dr) = true;
1018 access_fns.safe_push (access_fn);
1019 }
1020 }
1021 else if (DECL_P (ref))
1022 {
1023 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1024 ref = build2 (MEM_REF, TREE_TYPE (ref),
1025 build_fold_addr_expr (ref),
1026 build_int_cst (reference_alias_ptr_type (ref), 0));
1027 }
1028
1029 DR_BASE_OBJECT (dr) = ref;
1030 DR_ACCESS_FNS (dr) = access_fns;
1031 }
1032
1033 /* Extracts the alias analysis information from the memory reference DR. */
1034
1035 static void
1036 dr_analyze_alias (struct data_reference *dr)
1037 {
1038 tree ref = DR_REF (dr);
1039 tree base = get_base_address (ref), addr;
1040
1041 if (INDIRECT_REF_P (base)
1042 || TREE_CODE (base) == MEM_REF)
1043 {
1044 addr = TREE_OPERAND (base, 0);
1045 if (TREE_CODE (addr) == SSA_NAME)
1046 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1047 }
1048 }
1049
1050 /* Frees data reference DR. */
1051
1052 void
1053 free_data_ref (data_reference_p dr)
1054 {
1055 DR_ACCESS_FNS (dr).release ();
1056 free (dr);
1057 }
1058
1059 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1060 is read if IS_READ is true, write otherwise. Returns the
1061 data_reference description of MEMREF. NEST is the outermost loop
1062 in which the reference should be instantiated, LOOP is the loop in
1063 which the data reference should be analyzed. */
1064
1065 struct data_reference *
1066 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
1067 bool is_read)
1068 {
1069 struct data_reference *dr;
1070
1071 if (dump_file && (dump_flags & TDF_DETAILS))
1072 {
1073 fprintf (dump_file, "Creating dr for ");
1074 print_generic_expr (dump_file, memref, TDF_SLIM);
1075 fprintf (dump_file, "\n");
1076 }
1077
1078 dr = XCNEW (struct data_reference);
1079 DR_STMT (dr) = stmt;
1080 DR_REF (dr) = memref;
1081 DR_IS_READ (dr) = is_read;
1082
1083 dr_analyze_innermost (dr, nest);
1084 dr_analyze_indices (dr, nest, loop);
1085 dr_analyze_alias (dr);
1086
1087 if (dump_file && (dump_flags & TDF_DETAILS))
1088 {
1089 unsigned i;
1090 fprintf (dump_file, "\tbase_address: ");
1091 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1092 fprintf (dump_file, "\n\toffset from base address: ");
1093 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1094 fprintf (dump_file, "\n\tconstant offset from base address: ");
1095 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1096 fprintf (dump_file, "\n\tstep: ");
1097 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1098 fprintf (dump_file, "\n\taligned to: ");
1099 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1100 fprintf (dump_file, "\n\tbase_object: ");
1101 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1102 fprintf (dump_file, "\n");
1103 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1104 {
1105 fprintf (dump_file, "\tAccess function %d: ", i);
1106 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1107 }
1108 }
1109
1110 return dr;
1111 }
1112
1113 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1114 expressions. */
1115 static bool
1116 dr_equal_offsets_p1 (tree offset1, tree offset2)
1117 {
1118 bool res;
1119
1120 STRIP_NOPS (offset1);
1121 STRIP_NOPS (offset2);
1122
1123 if (offset1 == offset2)
1124 return true;
1125
1126 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1127 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1128 return false;
1129
1130 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1131 TREE_OPERAND (offset2, 0));
1132
1133 if (!res || !BINARY_CLASS_P (offset1))
1134 return res;
1135
1136 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1137 TREE_OPERAND (offset2, 1));
1138
1139 return res;
1140 }
1141
1142 /* Check if DRA and DRB have equal offsets. */
1143 bool
1144 dr_equal_offsets_p (struct data_reference *dra,
1145 struct data_reference *drb)
1146 {
1147 tree offset1, offset2;
1148
1149 offset1 = DR_OFFSET (dra);
1150 offset2 = DR_OFFSET (drb);
1151
1152 return dr_equal_offsets_p1 (offset1, offset2);
1153 }
1154
1155 /* Returns true if FNA == FNB. */
1156
1157 static bool
1158 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1159 {
1160 unsigned i, n = fna.length ();
1161
1162 if (n != fnb.length ())
1163 return false;
1164
1165 for (i = 0; i < n; i++)
1166 if (!operand_equal_p (fna[i], fnb[i], 0))
1167 return false;
1168
1169 return true;
1170 }
1171
1172 /* If all the functions in CF are the same, returns one of them,
1173 otherwise returns NULL. */
1174
1175 static affine_fn
1176 common_affine_function (conflict_function *cf)
1177 {
1178 unsigned i;
1179 affine_fn comm;
1180
1181 if (!CF_NONTRIVIAL_P (cf))
1182 return affine_fn ();
1183
1184 comm = cf->fns[0];
1185
1186 for (i = 1; i < cf->n; i++)
1187 if (!affine_function_equal_p (comm, cf->fns[i]))
1188 return affine_fn ();
1189
1190 return comm;
1191 }
1192
1193 /* Returns the base of the affine function FN. */
1194
1195 static tree
1196 affine_function_base (affine_fn fn)
1197 {
1198 return fn[0];
1199 }
1200
1201 /* Returns true if FN is a constant. */
1202
1203 static bool
1204 affine_function_constant_p (affine_fn fn)
1205 {
1206 unsigned i;
1207 tree coef;
1208
1209 for (i = 1; fn.iterate (i, &coef); i++)
1210 if (!integer_zerop (coef))
1211 return false;
1212
1213 return true;
1214 }
1215
1216 /* Returns true if FN is the zero constant function. */
1217
1218 static bool
1219 affine_function_zero_p (affine_fn fn)
1220 {
1221 return (integer_zerop (affine_function_base (fn))
1222 && affine_function_constant_p (fn));
1223 }
1224
1225 /* Returns a signed integer type with the largest precision from TA
1226 and TB. */
1227
1228 static tree
1229 signed_type_for_types (tree ta, tree tb)
1230 {
1231 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1232 return signed_type_for (ta);
1233 else
1234 return signed_type_for (tb);
1235 }
1236
1237 /* Applies operation OP on affine functions FNA and FNB, and returns the
1238 result. */
1239
1240 static affine_fn
1241 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1242 {
1243 unsigned i, n, m;
1244 affine_fn ret;
1245 tree coef;
1246
1247 if (fnb.length () > fna.length ())
1248 {
1249 n = fna.length ();
1250 m = fnb.length ();
1251 }
1252 else
1253 {
1254 n = fnb.length ();
1255 m = fna.length ();
1256 }
1257
1258 ret.create (m);
1259 for (i = 0; i < n; i++)
1260 {
1261 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1262 TREE_TYPE (fnb[i]));
1263 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1264 }
1265
1266 for (; fna.iterate (i, &coef); i++)
1267 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1268 coef, integer_zero_node));
1269 for (; fnb.iterate (i, &coef); i++)
1270 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1271 integer_zero_node, coef));
1272
1273 return ret;
1274 }
1275
1276 /* Returns the sum of affine functions FNA and FNB. */
1277
1278 static affine_fn
1279 affine_fn_plus (affine_fn fna, affine_fn fnb)
1280 {
1281 return affine_fn_op (PLUS_EXPR, fna, fnb);
1282 }
1283
1284 /* Returns the difference of affine functions FNA and FNB. */
1285
1286 static affine_fn
1287 affine_fn_minus (affine_fn fna, affine_fn fnb)
1288 {
1289 return affine_fn_op (MINUS_EXPR, fna, fnb);
1290 }
1291
1292 /* Frees affine function FN. */
1293
1294 static void
1295 affine_fn_free (affine_fn fn)
1296 {
1297 fn.release ();
1298 }
1299
1300 /* Determine for each subscript in the data dependence relation DDR
1301 the distance. */
1302
1303 static void
1304 compute_subscript_distance (struct data_dependence_relation *ddr)
1305 {
1306 conflict_function *cf_a, *cf_b;
1307 affine_fn fn_a, fn_b, diff;
1308
1309 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1310 {
1311 unsigned int i;
1312
1313 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1314 {
1315 struct subscript *subscript;
1316
1317 subscript = DDR_SUBSCRIPT (ddr, i);
1318 cf_a = SUB_CONFLICTS_IN_A (subscript);
1319 cf_b = SUB_CONFLICTS_IN_B (subscript);
1320
1321 fn_a = common_affine_function (cf_a);
1322 fn_b = common_affine_function (cf_b);
1323 if (!fn_a.exists () || !fn_b.exists ())
1324 {
1325 SUB_DISTANCE (subscript) = chrec_dont_know;
1326 return;
1327 }
1328 diff = affine_fn_minus (fn_a, fn_b);
1329
1330 if (affine_function_constant_p (diff))
1331 SUB_DISTANCE (subscript) = affine_function_base (diff);
1332 else
1333 SUB_DISTANCE (subscript) = chrec_dont_know;
1334
1335 affine_fn_free (diff);
1336 }
1337 }
1338 }
1339
1340 /* Returns the conflict function for "unknown". */
1341
1342 static conflict_function *
1343 conflict_fn_not_known (void)
1344 {
1345 conflict_function *fn = XCNEW (conflict_function);
1346 fn->n = NOT_KNOWN;
1347
1348 return fn;
1349 }
1350
1351 /* Returns the conflict function for "independent". */
1352
1353 static conflict_function *
1354 conflict_fn_no_dependence (void)
1355 {
1356 conflict_function *fn = XCNEW (conflict_function);
1357 fn->n = NO_DEPENDENCE;
1358
1359 return fn;
1360 }
1361
1362 /* Returns true if the address of OBJ is invariant in LOOP. */
1363
1364 static bool
1365 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1366 {
1367 while (handled_component_p (obj))
1368 {
1369 if (TREE_CODE (obj) == ARRAY_REF)
1370 {
1371 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1372 need to check the stride and the lower bound of the reference. */
1373 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1374 loop->num)
1375 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1376 loop->num))
1377 return false;
1378 }
1379 else if (TREE_CODE (obj) == COMPONENT_REF)
1380 {
1381 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1382 loop->num))
1383 return false;
1384 }
1385 obj = TREE_OPERAND (obj, 0);
1386 }
1387
1388 if (!INDIRECT_REF_P (obj)
1389 && TREE_CODE (obj) != MEM_REF)
1390 return true;
1391
1392 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1393 loop->num);
1394 }
1395
1396 /* Returns false if we can prove that data references A and B do not alias,
1397 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1398 considered. */
1399
1400 bool
1401 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1402 bool loop_nest)
1403 {
1404 tree addr_a = DR_BASE_OBJECT (a);
1405 tree addr_b = DR_BASE_OBJECT (b);
1406
1407 /* If we are not processing a loop nest but scalar code we
1408 do not need to care about possible cross-iteration dependences
1409 and thus can process the full original reference. Do so,
1410 similar to how loop invariant motion applies extra offset-based
1411 disambiguation. */
1412 if (!loop_nest)
1413 {
1414 aff_tree off1, off2;
1415 widest_int size1, size2;
1416 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1417 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1418 aff_combination_scale (&off1, -1);
1419 aff_combination_add (&off2, &off1);
1420 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1421 return false;
1422 }
1423
1424 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
1425 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
1426 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
1427 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
1428 return false;
1429
1430 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1431 do not know the size of the base-object. So we cannot do any
1432 offset/overlap based analysis but have to rely on points-to
1433 information only. */
1434 if (TREE_CODE (addr_a) == MEM_REF
1435 && (DR_UNCONSTRAINED_BASE (a)
1436 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
1437 {
1438 /* For true dependences we can apply TBAA. */
1439 if (flag_strict_aliasing
1440 && DR_IS_WRITE (a) && DR_IS_READ (b)
1441 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1442 get_alias_set (DR_REF (b))))
1443 return false;
1444 if (TREE_CODE (addr_b) == MEM_REF)
1445 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1446 TREE_OPERAND (addr_b, 0));
1447 else
1448 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1449 build_fold_addr_expr (addr_b));
1450 }
1451 else if (TREE_CODE (addr_b) == MEM_REF
1452 && (DR_UNCONSTRAINED_BASE (b)
1453 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
1454 {
1455 /* For true dependences we can apply TBAA. */
1456 if (flag_strict_aliasing
1457 && DR_IS_WRITE (a) && DR_IS_READ (b)
1458 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1459 get_alias_set (DR_REF (b))))
1460 return false;
1461 if (TREE_CODE (addr_a) == MEM_REF)
1462 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1463 TREE_OPERAND (addr_b, 0));
1464 else
1465 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1466 TREE_OPERAND (addr_b, 0));
1467 }
1468
1469 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1470 that is being subsetted in the loop nest. */
1471 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1472 return refs_output_dependent_p (addr_a, addr_b);
1473 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1474 return refs_anti_dependent_p (addr_a, addr_b);
1475 return refs_may_alias_p (addr_a, addr_b);
1476 }
1477
1478 /* Initialize a data dependence relation between data accesses A and
1479 B. NB_LOOPS is the number of loops surrounding the references: the
1480 size of the classic distance/direction vectors. */
1481
1482 struct data_dependence_relation *
1483 initialize_data_dependence_relation (struct data_reference *a,
1484 struct data_reference *b,
1485 vec<loop_p> loop_nest)
1486 {
1487 struct data_dependence_relation *res;
1488 unsigned int i;
1489
1490 res = XNEW (struct data_dependence_relation);
1491 DDR_A (res) = a;
1492 DDR_B (res) = b;
1493 DDR_LOOP_NEST (res).create (0);
1494 DDR_REVERSED_P (res) = false;
1495 DDR_SUBSCRIPTS (res).create (0);
1496 DDR_DIR_VECTS (res).create (0);
1497 DDR_DIST_VECTS (res).create (0);
1498
1499 if (a == NULL || b == NULL)
1500 {
1501 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1502 return res;
1503 }
1504
1505 /* If the data references do not alias, then they are independent. */
1506 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1507 {
1508 DDR_ARE_DEPENDENT (res) = chrec_known;
1509 return res;
1510 }
1511
1512 /* The case where the references are exactly the same. */
1513 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1514 {
1515 if (loop_nest.exists ()
1516 && !object_address_invariant_in_loop_p (loop_nest[0],
1517 DR_BASE_OBJECT (a)))
1518 {
1519 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1520 return res;
1521 }
1522 DDR_AFFINE_P (res) = true;
1523 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1524 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1525 DDR_LOOP_NEST (res) = loop_nest;
1526 DDR_INNER_LOOP (res) = 0;
1527 DDR_SELF_REFERENCE (res) = true;
1528 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1529 {
1530 struct subscript *subscript;
1531
1532 subscript = XNEW (struct subscript);
1533 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1534 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1535 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1536 SUB_DISTANCE (subscript) = chrec_dont_know;
1537 DDR_SUBSCRIPTS (res).safe_push (subscript);
1538 }
1539 return res;
1540 }
1541
1542 /* If the references do not access the same object, we do not know
1543 whether they alias or not. */
1544 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1545 {
1546 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1547 return res;
1548 }
1549
1550 /* If the base of the object is not invariant in the loop nest, we cannot
1551 analyze it. TODO -- in fact, it would suffice to record that there may
1552 be arbitrary dependences in the loops where the base object varies. */
1553 if (loop_nest.exists ()
1554 && !object_address_invariant_in_loop_p (loop_nest[0],
1555 DR_BASE_OBJECT (a)))
1556 {
1557 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1558 return res;
1559 }
1560
1561 /* If the number of dimensions of the access to not agree we can have
1562 a pointer access to a component of the array element type and an
1563 array access while the base-objects are still the same. Punt. */
1564 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1565 {
1566 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1567 return res;
1568 }
1569
1570 DDR_AFFINE_P (res) = true;
1571 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1572 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1573 DDR_LOOP_NEST (res) = loop_nest;
1574 DDR_INNER_LOOP (res) = 0;
1575 DDR_SELF_REFERENCE (res) = false;
1576
1577 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1578 {
1579 struct subscript *subscript;
1580
1581 subscript = XNEW (struct subscript);
1582 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1583 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1584 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1585 SUB_DISTANCE (subscript) = chrec_dont_know;
1586 DDR_SUBSCRIPTS (res).safe_push (subscript);
1587 }
1588
1589 return res;
1590 }
1591
1592 /* Frees memory used by the conflict function F. */
1593
1594 static void
1595 free_conflict_function (conflict_function *f)
1596 {
1597 unsigned i;
1598
1599 if (CF_NONTRIVIAL_P (f))
1600 {
1601 for (i = 0; i < f->n; i++)
1602 affine_fn_free (f->fns[i]);
1603 }
1604 free (f);
1605 }
1606
1607 /* Frees memory used by SUBSCRIPTS. */
1608
1609 static void
1610 free_subscripts (vec<subscript_p> subscripts)
1611 {
1612 unsigned i;
1613 subscript_p s;
1614
1615 FOR_EACH_VEC_ELT (subscripts, i, s)
1616 {
1617 free_conflict_function (s->conflicting_iterations_in_a);
1618 free_conflict_function (s->conflicting_iterations_in_b);
1619 free (s);
1620 }
1621 subscripts.release ();
1622 }
1623
1624 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1625 description. */
1626
1627 static inline void
1628 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1629 tree chrec)
1630 {
1631 DDR_ARE_DEPENDENT (ddr) = chrec;
1632 free_subscripts (DDR_SUBSCRIPTS (ddr));
1633 DDR_SUBSCRIPTS (ddr).create (0);
1634 }
1635
1636 /* The dependence relation DDR cannot be represented by a distance
1637 vector. */
1638
1639 static inline void
1640 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1641 {
1642 if (dump_file && (dump_flags & TDF_DETAILS))
1643 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1644
1645 DDR_AFFINE_P (ddr) = false;
1646 }
1647
1648 \f
1649
1650 /* This section contains the classic Banerjee tests. */
1651
1652 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1653 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1654
1655 static inline bool
1656 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1657 {
1658 return (evolution_function_is_constant_p (chrec_a)
1659 && evolution_function_is_constant_p (chrec_b));
1660 }
1661
1662 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1663 variable, i.e., if the SIV (Single Index Variable) test is true. */
1664
1665 static bool
1666 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1667 {
1668 if ((evolution_function_is_constant_p (chrec_a)
1669 && evolution_function_is_univariate_p (chrec_b))
1670 || (evolution_function_is_constant_p (chrec_b)
1671 && evolution_function_is_univariate_p (chrec_a)))
1672 return true;
1673
1674 if (evolution_function_is_univariate_p (chrec_a)
1675 && evolution_function_is_univariate_p (chrec_b))
1676 {
1677 switch (TREE_CODE (chrec_a))
1678 {
1679 case POLYNOMIAL_CHREC:
1680 switch (TREE_CODE (chrec_b))
1681 {
1682 case POLYNOMIAL_CHREC:
1683 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1684 return false;
1685
1686 default:
1687 return true;
1688 }
1689
1690 default:
1691 return true;
1692 }
1693 }
1694
1695 return false;
1696 }
1697
1698 /* Creates a conflict function with N dimensions. The affine functions
1699 in each dimension follow. */
1700
1701 static conflict_function *
1702 conflict_fn (unsigned n, ...)
1703 {
1704 unsigned i;
1705 conflict_function *ret = XCNEW (conflict_function);
1706 va_list ap;
1707
1708 gcc_assert (0 < n && n <= MAX_DIM);
1709 va_start (ap, n);
1710
1711 ret->n = n;
1712 for (i = 0; i < n; i++)
1713 ret->fns[i] = va_arg (ap, affine_fn);
1714 va_end (ap);
1715
1716 return ret;
1717 }
1718
1719 /* Returns constant affine function with value CST. */
1720
1721 static affine_fn
1722 affine_fn_cst (tree cst)
1723 {
1724 affine_fn fn;
1725 fn.create (1);
1726 fn.quick_push (cst);
1727 return fn;
1728 }
1729
1730 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1731
1732 static affine_fn
1733 affine_fn_univar (tree cst, unsigned dim, tree coef)
1734 {
1735 affine_fn fn;
1736 fn.create (dim + 1);
1737 unsigned i;
1738
1739 gcc_assert (dim > 0);
1740 fn.quick_push (cst);
1741 for (i = 1; i < dim; i++)
1742 fn.quick_push (integer_zero_node);
1743 fn.quick_push (coef);
1744 return fn;
1745 }
1746
1747 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1748 *OVERLAPS_B are initialized to the functions that describe the
1749 relation between the elements accessed twice by CHREC_A and
1750 CHREC_B. For k >= 0, the following property is verified:
1751
1752 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1753
1754 static void
1755 analyze_ziv_subscript (tree chrec_a,
1756 tree chrec_b,
1757 conflict_function **overlaps_a,
1758 conflict_function **overlaps_b,
1759 tree *last_conflicts)
1760 {
1761 tree type, difference;
1762 dependence_stats.num_ziv++;
1763
1764 if (dump_file && (dump_flags & TDF_DETAILS))
1765 fprintf (dump_file, "(analyze_ziv_subscript \n");
1766
1767 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1768 chrec_a = chrec_convert (type, chrec_a, NULL);
1769 chrec_b = chrec_convert (type, chrec_b, NULL);
1770 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1771
1772 switch (TREE_CODE (difference))
1773 {
1774 case INTEGER_CST:
1775 if (integer_zerop (difference))
1776 {
1777 /* The difference is equal to zero: the accessed index
1778 overlaps for each iteration in the loop. */
1779 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1780 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1781 *last_conflicts = chrec_dont_know;
1782 dependence_stats.num_ziv_dependent++;
1783 }
1784 else
1785 {
1786 /* The accesses do not overlap. */
1787 *overlaps_a = conflict_fn_no_dependence ();
1788 *overlaps_b = conflict_fn_no_dependence ();
1789 *last_conflicts = integer_zero_node;
1790 dependence_stats.num_ziv_independent++;
1791 }
1792 break;
1793
1794 default:
1795 /* We're not sure whether the indexes overlap. For the moment,
1796 conservatively answer "don't know". */
1797 if (dump_file && (dump_flags & TDF_DETAILS))
1798 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1799
1800 *overlaps_a = conflict_fn_not_known ();
1801 *overlaps_b = conflict_fn_not_known ();
1802 *last_conflicts = chrec_dont_know;
1803 dependence_stats.num_ziv_unimplemented++;
1804 break;
1805 }
1806
1807 if (dump_file && (dump_flags & TDF_DETAILS))
1808 fprintf (dump_file, ")\n");
1809 }
1810
1811 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1812 and only if it fits to the int type. If this is not the case, or the
1813 bound on the number of iterations of LOOP could not be derived, returns
1814 chrec_dont_know. */
1815
1816 static tree
1817 max_stmt_executions_tree (struct loop *loop)
1818 {
1819 widest_int nit;
1820
1821 if (!max_stmt_executions (loop, &nit))
1822 return chrec_dont_know;
1823
1824 if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1825 return chrec_dont_know;
1826
1827 return wide_int_to_tree (unsigned_type_node, nit);
1828 }
1829
1830 /* Determine whether the CHREC is always positive/negative. If the expression
1831 cannot be statically analyzed, return false, otherwise set the answer into
1832 VALUE. */
1833
1834 static bool
1835 chrec_is_positive (tree chrec, bool *value)
1836 {
1837 bool value0, value1, value2;
1838 tree end_value, nb_iter;
1839
1840 switch (TREE_CODE (chrec))
1841 {
1842 case POLYNOMIAL_CHREC:
1843 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1844 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1845 return false;
1846
1847 /* FIXME -- overflows. */
1848 if (value0 == value1)
1849 {
1850 *value = value0;
1851 return true;
1852 }
1853
1854 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1855 and the proof consists in showing that the sign never
1856 changes during the execution of the loop, from 0 to
1857 loop->nb_iterations. */
1858 if (!evolution_function_is_affine_p (chrec))
1859 return false;
1860
1861 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1862 if (chrec_contains_undetermined (nb_iter))
1863 return false;
1864
1865 #if 0
1866 /* TODO -- If the test is after the exit, we may decrease the number of
1867 iterations by one. */
1868 if (after_exit)
1869 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1870 #endif
1871
1872 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1873
1874 if (!chrec_is_positive (end_value, &value2))
1875 return false;
1876
1877 *value = value0;
1878 return value0 == value1;
1879
1880 case INTEGER_CST:
1881 switch (tree_int_cst_sgn (chrec))
1882 {
1883 case -1:
1884 *value = false;
1885 break;
1886 case 1:
1887 *value = true;
1888 break;
1889 default:
1890 return false;
1891 }
1892 return true;
1893
1894 default:
1895 return false;
1896 }
1897 }
1898
1899
1900 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1901 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1902 *OVERLAPS_B are initialized to the functions that describe the
1903 relation between the elements accessed twice by CHREC_A and
1904 CHREC_B. For k >= 0, the following property is verified:
1905
1906 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1907
1908 static void
1909 analyze_siv_subscript_cst_affine (tree chrec_a,
1910 tree chrec_b,
1911 conflict_function **overlaps_a,
1912 conflict_function **overlaps_b,
1913 tree *last_conflicts)
1914 {
1915 bool value0, value1, value2;
1916 tree type, difference, tmp;
1917
1918 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1919 chrec_a = chrec_convert (type, chrec_a, NULL);
1920 chrec_b = chrec_convert (type, chrec_b, NULL);
1921 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1922
1923 /* Special case overlap in the first iteration. */
1924 if (integer_zerop (difference))
1925 {
1926 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1927 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1928 *last_conflicts = integer_one_node;
1929 return;
1930 }
1931
1932 if (!chrec_is_positive (initial_condition (difference), &value0))
1933 {
1934 if (dump_file && (dump_flags & TDF_DETAILS))
1935 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1936
1937 dependence_stats.num_siv_unimplemented++;
1938 *overlaps_a = conflict_fn_not_known ();
1939 *overlaps_b = conflict_fn_not_known ();
1940 *last_conflicts = chrec_dont_know;
1941 return;
1942 }
1943 else
1944 {
1945 if (value0 == false)
1946 {
1947 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1948 {
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1950 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1951
1952 *overlaps_a = conflict_fn_not_known ();
1953 *overlaps_b = conflict_fn_not_known ();
1954 *last_conflicts = chrec_dont_know;
1955 dependence_stats.num_siv_unimplemented++;
1956 return;
1957 }
1958 else
1959 {
1960 if (value1 == true)
1961 {
1962 /* Example:
1963 chrec_a = 12
1964 chrec_b = {10, +, 1}
1965 */
1966
1967 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1968 {
1969 HOST_WIDE_INT numiter;
1970 struct loop *loop = get_chrec_loop (chrec_b);
1971
1972 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1973 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1974 fold_build1 (ABS_EXPR, type, difference),
1975 CHREC_RIGHT (chrec_b));
1976 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1977 *last_conflicts = integer_one_node;
1978
1979
1980 /* Perform weak-zero siv test to see if overlap is
1981 outside the loop bounds. */
1982 numiter = max_stmt_executions_int (loop);
1983
1984 if (numiter >= 0
1985 && compare_tree_int (tmp, numiter) > 0)
1986 {
1987 free_conflict_function (*overlaps_a);
1988 free_conflict_function (*overlaps_b);
1989 *overlaps_a = conflict_fn_no_dependence ();
1990 *overlaps_b = conflict_fn_no_dependence ();
1991 *last_conflicts = integer_zero_node;
1992 dependence_stats.num_siv_independent++;
1993 return;
1994 }
1995 dependence_stats.num_siv_dependent++;
1996 return;
1997 }
1998
1999 /* When the step does not divide the difference, there are
2000 no overlaps. */
2001 else
2002 {
2003 *overlaps_a = conflict_fn_no_dependence ();
2004 *overlaps_b = conflict_fn_no_dependence ();
2005 *last_conflicts = integer_zero_node;
2006 dependence_stats.num_siv_independent++;
2007 return;
2008 }
2009 }
2010
2011 else
2012 {
2013 /* Example:
2014 chrec_a = 12
2015 chrec_b = {10, +, -1}
2016
2017 In this case, chrec_a will not overlap with chrec_b. */
2018 *overlaps_a = conflict_fn_no_dependence ();
2019 *overlaps_b = conflict_fn_no_dependence ();
2020 *last_conflicts = integer_zero_node;
2021 dependence_stats.num_siv_independent++;
2022 return;
2023 }
2024 }
2025 }
2026 else
2027 {
2028 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2029 {
2030 if (dump_file && (dump_flags & TDF_DETAILS))
2031 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2032
2033 *overlaps_a = conflict_fn_not_known ();
2034 *overlaps_b = conflict_fn_not_known ();
2035 *last_conflicts = chrec_dont_know;
2036 dependence_stats.num_siv_unimplemented++;
2037 return;
2038 }
2039 else
2040 {
2041 if (value2 == false)
2042 {
2043 /* Example:
2044 chrec_a = 3
2045 chrec_b = {10, +, -1}
2046 */
2047 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2048 {
2049 HOST_WIDE_INT numiter;
2050 struct loop *loop = get_chrec_loop (chrec_b);
2051
2052 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2053 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2054 CHREC_RIGHT (chrec_b));
2055 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2056 *last_conflicts = integer_one_node;
2057
2058 /* Perform weak-zero siv test to see if overlap is
2059 outside the loop bounds. */
2060 numiter = max_stmt_executions_int (loop);
2061
2062 if (numiter >= 0
2063 && compare_tree_int (tmp, numiter) > 0)
2064 {
2065 free_conflict_function (*overlaps_a);
2066 free_conflict_function (*overlaps_b);
2067 *overlaps_a = conflict_fn_no_dependence ();
2068 *overlaps_b = conflict_fn_no_dependence ();
2069 *last_conflicts = integer_zero_node;
2070 dependence_stats.num_siv_independent++;
2071 return;
2072 }
2073 dependence_stats.num_siv_dependent++;
2074 return;
2075 }
2076
2077 /* When the step does not divide the difference, there
2078 are no overlaps. */
2079 else
2080 {
2081 *overlaps_a = conflict_fn_no_dependence ();
2082 *overlaps_b = conflict_fn_no_dependence ();
2083 *last_conflicts = integer_zero_node;
2084 dependence_stats.num_siv_independent++;
2085 return;
2086 }
2087 }
2088 else
2089 {
2090 /* Example:
2091 chrec_a = 3
2092 chrec_b = {4, +, 1}
2093
2094 In this case, chrec_a will not overlap with chrec_b. */
2095 *overlaps_a = conflict_fn_no_dependence ();
2096 *overlaps_b = conflict_fn_no_dependence ();
2097 *last_conflicts = integer_zero_node;
2098 dependence_stats.num_siv_independent++;
2099 return;
2100 }
2101 }
2102 }
2103 }
2104 }
2105
2106 /* Helper recursive function for initializing the matrix A. Returns
2107 the initial value of CHREC. */
2108
2109 static tree
2110 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2111 {
2112 gcc_assert (chrec);
2113
2114 switch (TREE_CODE (chrec))
2115 {
2116 case POLYNOMIAL_CHREC:
2117 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2118
2119 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2120 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2121
2122 case PLUS_EXPR:
2123 case MULT_EXPR:
2124 case MINUS_EXPR:
2125 {
2126 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2127 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2128
2129 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2130 }
2131
2132 CASE_CONVERT:
2133 {
2134 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2135 return chrec_convert (chrec_type (chrec), op, NULL);
2136 }
2137
2138 case BIT_NOT_EXPR:
2139 {
2140 /* Handle ~X as -1 - X. */
2141 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2142 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2143 build_int_cst (TREE_TYPE (chrec), -1), op);
2144 }
2145
2146 case INTEGER_CST:
2147 return chrec;
2148
2149 default:
2150 gcc_unreachable ();
2151 return NULL_TREE;
2152 }
2153 }
2154
2155 #define FLOOR_DIV(x,y) ((x) / (y))
2156
2157 /* Solves the special case of the Diophantine equation:
2158 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2159
2160 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2161 number of iterations that loops X and Y run. The overlaps will be
2162 constructed as evolutions in dimension DIM. */
2163
2164 static void
2165 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2166 affine_fn *overlaps_a,
2167 affine_fn *overlaps_b,
2168 tree *last_conflicts, int dim)
2169 {
2170 if (((step_a > 0 && step_b > 0)
2171 || (step_a < 0 && step_b < 0)))
2172 {
2173 int step_overlaps_a, step_overlaps_b;
2174 int gcd_steps_a_b, last_conflict, tau2;
2175
2176 gcd_steps_a_b = gcd (step_a, step_b);
2177 step_overlaps_a = step_b / gcd_steps_a_b;
2178 step_overlaps_b = step_a / gcd_steps_a_b;
2179
2180 if (niter > 0)
2181 {
2182 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2183 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2184 last_conflict = tau2;
2185 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2186 }
2187 else
2188 *last_conflicts = chrec_dont_know;
2189
2190 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2191 build_int_cst (NULL_TREE,
2192 step_overlaps_a));
2193 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2194 build_int_cst (NULL_TREE,
2195 step_overlaps_b));
2196 }
2197
2198 else
2199 {
2200 *overlaps_a = affine_fn_cst (integer_zero_node);
2201 *overlaps_b = affine_fn_cst (integer_zero_node);
2202 *last_conflicts = integer_zero_node;
2203 }
2204 }
2205
2206 /* Solves the special case of a Diophantine equation where CHREC_A is
2207 an affine bivariate function, and CHREC_B is an affine univariate
2208 function. For example,
2209
2210 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2211
2212 has the following overlapping functions:
2213
2214 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2215 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2216 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2217
2218 FORNOW: This is a specialized implementation for a case occurring in
2219 a common benchmark. Implement the general algorithm. */
2220
2221 static void
2222 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2223 conflict_function **overlaps_a,
2224 conflict_function **overlaps_b,
2225 tree *last_conflicts)
2226 {
2227 bool xz_p, yz_p, xyz_p;
2228 int step_x, step_y, step_z;
2229 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2230 affine_fn overlaps_a_xz, overlaps_b_xz;
2231 affine_fn overlaps_a_yz, overlaps_b_yz;
2232 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2233 affine_fn ova1, ova2, ovb;
2234 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2235
2236 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2237 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2238 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2239
2240 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2241 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2242 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2243
2244 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2245 {
2246 if (dump_file && (dump_flags & TDF_DETAILS))
2247 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2248
2249 *overlaps_a = conflict_fn_not_known ();
2250 *overlaps_b = conflict_fn_not_known ();
2251 *last_conflicts = chrec_dont_know;
2252 return;
2253 }
2254
2255 niter = MIN (niter_x, niter_z);
2256 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2257 &overlaps_a_xz,
2258 &overlaps_b_xz,
2259 &last_conflicts_xz, 1);
2260 niter = MIN (niter_y, niter_z);
2261 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2262 &overlaps_a_yz,
2263 &overlaps_b_yz,
2264 &last_conflicts_yz, 2);
2265 niter = MIN (niter_x, niter_z);
2266 niter = MIN (niter_y, niter);
2267 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2268 &overlaps_a_xyz,
2269 &overlaps_b_xyz,
2270 &last_conflicts_xyz, 3);
2271
2272 xz_p = !integer_zerop (last_conflicts_xz);
2273 yz_p = !integer_zerop (last_conflicts_yz);
2274 xyz_p = !integer_zerop (last_conflicts_xyz);
2275
2276 if (xz_p || yz_p || xyz_p)
2277 {
2278 ova1 = affine_fn_cst (integer_zero_node);
2279 ova2 = affine_fn_cst (integer_zero_node);
2280 ovb = affine_fn_cst (integer_zero_node);
2281 if (xz_p)
2282 {
2283 affine_fn t0 = ova1;
2284 affine_fn t2 = ovb;
2285
2286 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2287 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2288 affine_fn_free (t0);
2289 affine_fn_free (t2);
2290 *last_conflicts = last_conflicts_xz;
2291 }
2292 if (yz_p)
2293 {
2294 affine_fn t0 = ova2;
2295 affine_fn t2 = ovb;
2296
2297 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2298 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2299 affine_fn_free (t0);
2300 affine_fn_free (t2);
2301 *last_conflicts = last_conflicts_yz;
2302 }
2303 if (xyz_p)
2304 {
2305 affine_fn t0 = ova1;
2306 affine_fn t2 = ova2;
2307 affine_fn t4 = ovb;
2308
2309 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2310 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2311 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2312 affine_fn_free (t0);
2313 affine_fn_free (t2);
2314 affine_fn_free (t4);
2315 *last_conflicts = last_conflicts_xyz;
2316 }
2317 *overlaps_a = conflict_fn (2, ova1, ova2);
2318 *overlaps_b = conflict_fn (1, ovb);
2319 }
2320 else
2321 {
2322 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2323 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2324 *last_conflicts = integer_zero_node;
2325 }
2326
2327 affine_fn_free (overlaps_a_xz);
2328 affine_fn_free (overlaps_b_xz);
2329 affine_fn_free (overlaps_a_yz);
2330 affine_fn_free (overlaps_b_yz);
2331 affine_fn_free (overlaps_a_xyz);
2332 affine_fn_free (overlaps_b_xyz);
2333 }
2334
2335 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2336
2337 static void
2338 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2339 int size)
2340 {
2341 memcpy (vec2, vec1, size * sizeof (*vec1));
2342 }
2343
2344 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2345
2346 static void
2347 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2348 int m, int n)
2349 {
2350 int i;
2351
2352 for (i = 0; i < m; i++)
2353 lambda_vector_copy (mat1[i], mat2[i], n);
2354 }
2355
2356 /* Store the N x N identity matrix in MAT. */
2357
2358 static void
2359 lambda_matrix_id (lambda_matrix mat, int size)
2360 {
2361 int i, j;
2362
2363 for (i = 0; i < size; i++)
2364 for (j = 0; j < size; j++)
2365 mat[i][j] = (i == j) ? 1 : 0;
2366 }
2367
2368 /* Return the first nonzero element of vector VEC1 between START and N.
2369 We must have START <= N. Returns N if VEC1 is the zero vector. */
2370
2371 static int
2372 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2373 {
2374 int j = start;
2375 while (j < n && vec1[j] == 0)
2376 j++;
2377 return j;
2378 }
2379
2380 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2381 R2 = R2 + CONST1 * R1. */
2382
2383 static void
2384 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2385 {
2386 int i;
2387
2388 if (const1 == 0)
2389 return;
2390
2391 for (i = 0; i < n; i++)
2392 mat[r2][i] += const1 * mat[r1][i];
2393 }
2394
2395 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2396 and store the result in VEC2. */
2397
2398 static void
2399 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2400 int size, int const1)
2401 {
2402 int i;
2403
2404 if (const1 == 0)
2405 lambda_vector_clear (vec2, size);
2406 else
2407 for (i = 0; i < size; i++)
2408 vec2[i] = const1 * vec1[i];
2409 }
2410
2411 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2412
2413 static void
2414 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2415 int size)
2416 {
2417 lambda_vector_mult_const (vec1, vec2, size, -1);
2418 }
2419
2420 /* Negate row R1 of matrix MAT which has N columns. */
2421
2422 static void
2423 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2424 {
2425 lambda_vector_negate (mat[r1], mat[r1], n);
2426 }
2427
2428 /* Return true if two vectors are equal. */
2429
2430 static bool
2431 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2432 {
2433 int i;
2434 for (i = 0; i < size; i++)
2435 if (vec1[i] != vec2[i])
2436 return false;
2437 return true;
2438 }
2439
2440 /* Given an M x N integer matrix A, this function determines an M x
2441 M unimodular matrix U, and an M x N echelon matrix S such that
2442 "U.A = S". This decomposition is also known as "right Hermite".
2443
2444 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2445 Restructuring Compilers" Utpal Banerjee. */
2446
2447 static void
2448 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2449 lambda_matrix S, lambda_matrix U)
2450 {
2451 int i, j, i0 = 0;
2452
2453 lambda_matrix_copy (A, S, m, n);
2454 lambda_matrix_id (U, m);
2455
2456 for (j = 0; j < n; j++)
2457 {
2458 if (lambda_vector_first_nz (S[j], m, i0) < m)
2459 {
2460 ++i0;
2461 for (i = m - 1; i >= i0; i--)
2462 {
2463 while (S[i][j] != 0)
2464 {
2465 int sigma, factor, a, b;
2466
2467 a = S[i-1][j];
2468 b = S[i][j];
2469 sigma = (a * b < 0) ? -1: 1;
2470 a = abs (a);
2471 b = abs (b);
2472 factor = sigma * (a / b);
2473
2474 lambda_matrix_row_add (S, n, i, i-1, -factor);
2475 std::swap (S[i], S[i-1]);
2476
2477 lambda_matrix_row_add (U, m, i, i-1, -factor);
2478 std::swap (U[i], U[i-1]);
2479 }
2480 }
2481 }
2482 }
2483 }
2484
2485 /* Determines the overlapping elements due to accesses CHREC_A and
2486 CHREC_B, that are affine functions. This function cannot handle
2487 symbolic evolution functions, ie. when initial conditions are
2488 parameters, because it uses lambda matrices of integers. */
2489
2490 static void
2491 analyze_subscript_affine_affine (tree chrec_a,
2492 tree chrec_b,
2493 conflict_function **overlaps_a,
2494 conflict_function **overlaps_b,
2495 tree *last_conflicts)
2496 {
2497 unsigned nb_vars_a, nb_vars_b, dim;
2498 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2499 lambda_matrix A, U, S;
2500 struct obstack scratch_obstack;
2501
2502 if (eq_evolutions_p (chrec_a, chrec_b))
2503 {
2504 /* The accessed index overlaps for each iteration in the
2505 loop. */
2506 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2507 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2508 *last_conflicts = chrec_dont_know;
2509 return;
2510 }
2511 if (dump_file && (dump_flags & TDF_DETAILS))
2512 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2513
2514 /* For determining the initial intersection, we have to solve a
2515 Diophantine equation. This is the most time consuming part.
2516
2517 For answering to the question: "Is there a dependence?" we have
2518 to prove that there exists a solution to the Diophantine
2519 equation, and that the solution is in the iteration domain,
2520 i.e. the solution is positive or zero, and that the solution
2521 happens before the upper bound loop.nb_iterations. Otherwise
2522 there is no dependence. This function outputs a description of
2523 the iterations that hold the intersections. */
2524
2525 nb_vars_a = nb_vars_in_chrec (chrec_a);
2526 nb_vars_b = nb_vars_in_chrec (chrec_b);
2527
2528 gcc_obstack_init (&scratch_obstack);
2529
2530 dim = nb_vars_a + nb_vars_b;
2531 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2532 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2533 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2534
2535 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2536 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2537 gamma = init_b - init_a;
2538
2539 /* Don't do all the hard work of solving the Diophantine equation
2540 when we already know the solution: for example,
2541 | {3, +, 1}_1
2542 | {3, +, 4}_2
2543 | gamma = 3 - 3 = 0.
2544 Then the first overlap occurs during the first iterations:
2545 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2546 */
2547 if (gamma == 0)
2548 {
2549 if (nb_vars_a == 1 && nb_vars_b == 1)
2550 {
2551 HOST_WIDE_INT step_a, step_b;
2552 HOST_WIDE_INT niter, niter_a, niter_b;
2553 affine_fn ova, ovb;
2554
2555 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2556 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2557 niter = MIN (niter_a, niter_b);
2558 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2559 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2560
2561 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2562 &ova, &ovb,
2563 last_conflicts, 1);
2564 *overlaps_a = conflict_fn (1, ova);
2565 *overlaps_b = conflict_fn (1, ovb);
2566 }
2567
2568 else if (nb_vars_a == 2 && nb_vars_b == 1)
2569 compute_overlap_steps_for_affine_1_2
2570 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2571
2572 else if (nb_vars_a == 1 && nb_vars_b == 2)
2573 compute_overlap_steps_for_affine_1_2
2574 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2575
2576 else
2577 {
2578 if (dump_file && (dump_flags & TDF_DETAILS))
2579 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2580 *overlaps_a = conflict_fn_not_known ();
2581 *overlaps_b = conflict_fn_not_known ();
2582 *last_conflicts = chrec_dont_know;
2583 }
2584 goto end_analyze_subs_aa;
2585 }
2586
2587 /* U.A = S */
2588 lambda_matrix_right_hermite (A, dim, 1, S, U);
2589
2590 if (S[0][0] < 0)
2591 {
2592 S[0][0] *= -1;
2593 lambda_matrix_row_negate (U, dim, 0);
2594 }
2595 gcd_alpha_beta = S[0][0];
2596
2597 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2598 but that is a quite strange case. Instead of ICEing, answer
2599 don't know. */
2600 if (gcd_alpha_beta == 0)
2601 {
2602 *overlaps_a = conflict_fn_not_known ();
2603 *overlaps_b = conflict_fn_not_known ();
2604 *last_conflicts = chrec_dont_know;
2605 goto end_analyze_subs_aa;
2606 }
2607
2608 /* The classic "gcd-test". */
2609 if (!int_divides_p (gcd_alpha_beta, gamma))
2610 {
2611 /* The "gcd-test" has determined that there is no integer
2612 solution, i.e. there is no dependence. */
2613 *overlaps_a = conflict_fn_no_dependence ();
2614 *overlaps_b = conflict_fn_no_dependence ();
2615 *last_conflicts = integer_zero_node;
2616 }
2617
2618 /* Both access functions are univariate. This includes SIV and MIV cases. */
2619 else if (nb_vars_a == 1 && nb_vars_b == 1)
2620 {
2621 /* Both functions should have the same evolution sign. */
2622 if (((A[0][0] > 0 && -A[1][0] > 0)
2623 || (A[0][0] < 0 && -A[1][0] < 0)))
2624 {
2625 /* The solutions are given by:
2626 |
2627 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2628 | [u21 u22] [y0]
2629
2630 For a given integer t. Using the following variables,
2631
2632 | i0 = u11 * gamma / gcd_alpha_beta
2633 | j0 = u12 * gamma / gcd_alpha_beta
2634 | i1 = u21
2635 | j1 = u22
2636
2637 the solutions are:
2638
2639 | x0 = i0 + i1 * t,
2640 | y0 = j0 + j1 * t. */
2641 HOST_WIDE_INT i0, j0, i1, j1;
2642
2643 i0 = U[0][0] * gamma / gcd_alpha_beta;
2644 j0 = U[0][1] * gamma / gcd_alpha_beta;
2645 i1 = U[1][0];
2646 j1 = U[1][1];
2647
2648 if ((i1 == 0 && i0 < 0)
2649 || (j1 == 0 && j0 < 0))
2650 {
2651 /* There is no solution.
2652 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2653 falls in here, but for the moment we don't look at the
2654 upper bound of the iteration domain. */
2655 *overlaps_a = conflict_fn_no_dependence ();
2656 *overlaps_b = conflict_fn_no_dependence ();
2657 *last_conflicts = integer_zero_node;
2658 goto end_analyze_subs_aa;
2659 }
2660
2661 if (i1 > 0 && j1 > 0)
2662 {
2663 HOST_WIDE_INT niter_a
2664 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2665 HOST_WIDE_INT niter_b
2666 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2667 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2668
2669 /* (X0, Y0) is a solution of the Diophantine equation:
2670 "chrec_a (X0) = chrec_b (Y0)". */
2671 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2672 CEIL (-j0, j1));
2673 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2674 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2675
2676 /* (X1, Y1) is the smallest positive solution of the eq
2677 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2678 first conflict occurs. */
2679 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2680 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2681 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2682
2683 if (niter > 0)
2684 {
2685 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2686 FLOOR_DIV (niter - j0, j1));
2687 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2688
2689 /* If the overlap occurs outside of the bounds of the
2690 loop, there is no dependence. */
2691 if (x1 >= niter || y1 >= niter)
2692 {
2693 *overlaps_a = conflict_fn_no_dependence ();
2694 *overlaps_b = conflict_fn_no_dependence ();
2695 *last_conflicts = integer_zero_node;
2696 goto end_analyze_subs_aa;
2697 }
2698 else
2699 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2700 }
2701 else
2702 *last_conflicts = chrec_dont_know;
2703
2704 *overlaps_a
2705 = conflict_fn (1,
2706 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2707 1,
2708 build_int_cst (NULL_TREE, i1)));
2709 *overlaps_b
2710 = conflict_fn (1,
2711 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2712 1,
2713 build_int_cst (NULL_TREE, j1)));
2714 }
2715 else
2716 {
2717 /* FIXME: For the moment, the upper bound of the
2718 iteration domain for i and j is not checked. */
2719 if (dump_file && (dump_flags & TDF_DETAILS))
2720 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2721 *overlaps_a = conflict_fn_not_known ();
2722 *overlaps_b = conflict_fn_not_known ();
2723 *last_conflicts = chrec_dont_know;
2724 }
2725 }
2726 else
2727 {
2728 if (dump_file && (dump_flags & TDF_DETAILS))
2729 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2730 *overlaps_a = conflict_fn_not_known ();
2731 *overlaps_b = conflict_fn_not_known ();
2732 *last_conflicts = chrec_dont_know;
2733 }
2734 }
2735 else
2736 {
2737 if (dump_file && (dump_flags & TDF_DETAILS))
2738 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2739 *overlaps_a = conflict_fn_not_known ();
2740 *overlaps_b = conflict_fn_not_known ();
2741 *last_conflicts = chrec_dont_know;
2742 }
2743
2744 end_analyze_subs_aa:
2745 obstack_free (&scratch_obstack, NULL);
2746 if (dump_file && (dump_flags & TDF_DETAILS))
2747 {
2748 fprintf (dump_file, " (overlaps_a = ");
2749 dump_conflict_function (dump_file, *overlaps_a);
2750 fprintf (dump_file, ")\n (overlaps_b = ");
2751 dump_conflict_function (dump_file, *overlaps_b);
2752 fprintf (dump_file, "))\n");
2753 }
2754 }
2755
2756 /* Returns true when analyze_subscript_affine_affine can be used for
2757 determining the dependence relation between chrec_a and chrec_b,
2758 that contain symbols. This function modifies chrec_a and chrec_b
2759 such that the analysis result is the same, and such that they don't
2760 contain symbols, and then can safely be passed to the analyzer.
2761
2762 Example: The analysis of the following tuples of evolutions produce
2763 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2764 vs. {0, +, 1}_1
2765
2766 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2767 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2768 */
2769
2770 static bool
2771 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2772 {
2773 tree diff, type, left_a, left_b, right_b;
2774
2775 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2776 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2777 /* FIXME: For the moment not handled. Might be refined later. */
2778 return false;
2779
2780 type = chrec_type (*chrec_a);
2781 left_a = CHREC_LEFT (*chrec_a);
2782 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2783 diff = chrec_fold_minus (type, left_a, left_b);
2784
2785 if (!evolution_function_is_constant_p (diff))
2786 return false;
2787
2788 if (dump_file && (dump_flags & TDF_DETAILS))
2789 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2790
2791 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2792 diff, CHREC_RIGHT (*chrec_a));
2793 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2794 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2795 build_int_cst (type, 0),
2796 right_b);
2797 return true;
2798 }
2799
2800 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2801 *OVERLAPS_B are initialized to the functions that describe the
2802 relation between the elements accessed twice by CHREC_A and
2803 CHREC_B. For k >= 0, the following property is verified:
2804
2805 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2806
2807 static void
2808 analyze_siv_subscript (tree chrec_a,
2809 tree chrec_b,
2810 conflict_function **overlaps_a,
2811 conflict_function **overlaps_b,
2812 tree *last_conflicts,
2813 int loop_nest_num)
2814 {
2815 dependence_stats.num_siv++;
2816
2817 if (dump_file && (dump_flags & TDF_DETAILS))
2818 fprintf (dump_file, "(analyze_siv_subscript \n");
2819
2820 if (evolution_function_is_constant_p (chrec_a)
2821 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2822 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2823 overlaps_a, overlaps_b, last_conflicts);
2824
2825 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2826 && evolution_function_is_constant_p (chrec_b))
2827 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2828 overlaps_b, overlaps_a, last_conflicts);
2829
2830 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2831 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2832 {
2833 if (!chrec_contains_symbols (chrec_a)
2834 && !chrec_contains_symbols (chrec_b))
2835 {
2836 analyze_subscript_affine_affine (chrec_a, chrec_b,
2837 overlaps_a, overlaps_b,
2838 last_conflicts);
2839
2840 if (CF_NOT_KNOWN_P (*overlaps_a)
2841 || CF_NOT_KNOWN_P (*overlaps_b))
2842 dependence_stats.num_siv_unimplemented++;
2843 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2844 || CF_NO_DEPENDENCE_P (*overlaps_b))
2845 dependence_stats.num_siv_independent++;
2846 else
2847 dependence_stats.num_siv_dependent++;
2848 }
2849 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2850 &chrec_b))
2851 {
2852 analyze_subscript_affine_affine (chrec_a, chrec_b,
2853 overlaps_a, overlaps_b,
2854 last_conflicts);
2855
2856 if (CF_NOT_KNOWN_P (*overlaps_a)
2857 || CF_NOT_KNOWN_P (*overlaps_b))
2858 dependence_stats.num_siv_unimplemented++;
2859 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2860 || CF_NO_DEPENDENCE_P (*overlaps_b))
2861 dependence_stats.num_siv_independent++;
2862 else
2863 dependence_stats.num_siv_dependent++;
2864 }
2865 else
2866 goto siv_subscript_dontknow;
2867 }
2868
2869 else
2870 {
2871 siv_subscript_dontknow:;
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2873 fprintf (dump_file, " siv test failed: unimplemented");
2874 *overlaps_a = conflict_fn_not_known ();
2875 *overlaps_b = conflict_fn_not_known ();
2876 *last_conflicts = chrec_dont_know;
2877 dependence_stats.num_siv_unimplemented++;
2878 }
2879
2880 if (dump_file && (dump_flags & TDF_DETAILS))
2881 fprintf (dump_file, ")\n");
2882 }
2883
2884 /* Returns false if we can prove that the greatest common divisor of the steps
2885 of CHREC does not divide CST, false otherwise. */
2886
2887 static bool
2888 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2889 {
2890 HOST_WIDE_INT cd = 0, val;
2891 tree step;
2892
2893 if (!tree_fits_shwi_p (cst))
2894 return true;
2895 val = tree_to_shwi (cst);
2896
2897 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2898 {
2899 step = CHREC_RIGHT (chrec);
2900 if (!tree_fits_shwi_p (step))
2901 return true;
2902 cd = gcd (cd, tree_to_shwi (step));
2903 chrec = CHREC_LEFT (chrec);
2904 }
2905
2906 return val % cd == 0;
2907 }
2908
2909 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2910 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2911 functions that describe the relation between the elements accessed
2912 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2913 is verified:
2914
2915 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2916
2917 static void
2918 analyze_miv_subscript (tree chrec_a,
2919 tree chrec_b,
2920 conflict_function **overlaps_a,
2921 conflict_function **overlaps_b,
2922 tree *last_conflicts,
2923 struct loop *loop_nest)
2924 {
2925 tree type, difference;
2926
2927 dependence_stats.num_miv++;
2928 if (dump_file && (dump_flags & TDF_DETAILS))
2929 fprintf (dump_file, "(analyze_miv_subscript \n");
2930
2931 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2932 chrec_a = chrec_convert (type, chrec_a, NULL);
2933 chrec_b = chrec_convert (type, chrec_b, NULL);
2934 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2935
2936 if (eq_evolutions_p (chrec_a, chrec_b))
2937 {
2938 /* Access functions are the same: all the elements are accessed
2939 in the same order. */
2940 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2941 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2942 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2943 dependence_stats.num_miv_dependent++;
2944 }
2945
2946 else if (evolution_function_is_constant_p (difference)
2947 /* For the moment, the following is verified:
2948 evolution_function_is_affine_multivariate_p (chrec_a,
2949 loop_nest->num) */
2950 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2951 {
2952 /* testsuite/.../ssa-chrec-33.c
2953 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2954
2955 The difference is 1, and all the evolution steps are multiples
2956 of 2, consequently there are no overlapping elements. */
2957 *overlaps_a = conflict_fn_no_dependence ();
2958 *overlaps_b = conflict_fn_no_dependence ();
2959 *last_conflicts = integer_zero_node;
2960 dependence_stats.num_miv_independent++;
2961 }
2962
2963 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2964 && !chrec_contains_symbols (chrec_a)
2965 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2966 && !chrec_contains_symbols (chrec_b))
2967 {
2968 /* testsuite/.../ssa-chrec-35.c
2969 {0, +, 1}_2 vs. {0, +, 1}_3
2970 the overlapping elements are respectively located at iterations:
2971 {0, +, 1}_x and {0, +, 1}_x,
2972 in other words, we have the equality:
2973 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2974
2975 Other examples:
2976 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2977 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2978
2979 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2980 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2981 */
2982 analyze_subscript_affine_affine (chrec_a, chrec_b,
2983 overlaps_a, overlaps_b, last_conflicts);
2984
2985 if (CF_NOT_KNOWN_P (*overlaps_a)
2986 || CF_NOT_KNOWN_P (*overlaps_b))
2987 dependence_stats.num_miv_unimplemented++;
2988 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2989 || CF_NO_DEPENDENCE_P (*overlaps_b))
2990 dependence_stats.num_miv_independent++;
2991 else
2992 dependence_stats.num_miv_dependent++;
2993 }
2994
2995 else
2996 {
2997 /* When the analysis is too difficult, answer "don't know". */
2998 if (dump_file && (dump_flags & TDF_DETAILS))
2999 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3000
3001 *overlaps_a = conflict_fn_not_known ();
3002 *overlaps_b = conflict_fn_not_known ();
3003 *last_conflicts = chrec_dont_know;
3004 dependence_stats.num_miv_unimplemented++;
3005 }
3006
3007 if (dump_file && (dump_flags & TDF_DETAILS))
3008 fprintf (dump_file, ")\n");
3009 }
3010
3011 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3012 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3013 OVERLAP_ITERATIONS_B are initialized with two functions that
3014 describe the iterations that contain conflicting elements.
3015
3016 Remark: For an integer k >= 0, the following equality is true:
3017
3018 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3019 */
3020
3021 static void
3022 analyze_overlapping_iterations (tree chrec_a,
3023 tree chrec_b,
3024 conflict_function **overlap_iterations_a,
3025 conflict_function **overlap_iterations_b,
3026 tree *last_conflicts, struct loop *loop_nest)
3027 {
3028 unsigned int lnn = loop_nest->num;
3029
3030 dependence_stats.num_subscript_tests++;
3031
3032 if (dump_file && (dump_flags & TDF_DETAILS))
3033 {
3034 fprintf (dump_file, "(analyze_overlapping_iterations \n");
3035 fprintf (dump_file, " (chrec_a = ");
3036 print_generic_expr (dump_file, chrec_a, 0);
3037 fprintf (dump_file, ")\n (chrec_b = ");
3038 print_generic_expr (dump_file, chrec_b, 0);
3039 fprintf (dump_file, ")\n");
3040 }
3041
3042 if (chrec_a == NULL_TREE
3043 || chrec_b == NULL_TREE
3044 || chrec_contains_undetermined (chrec_a)
3045 || chrec_contains_undetermined (chrec_b))
3046 {
3047 dependence_stats.num_subscript_undetermined++;
3048
3049 *overlap_iterations_a = conflict_fn_not_known ();
3050 *overlap_iterations_b = conflict_fn_not_known ();
3051 }
3052
3053 /* If they are the same chrec, and are affine, they overlap
3054 on every iteration. */
3055 else if (eq_evolutions_p (chrec_a, chrec_b)
3056 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3057 || operand_equal_p (chrec_a, chrec_b, 0)))
3058 {
3059 dependence_stats.num_same_subscript_function++;
3060 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3061 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3062 *last_conflicts = chrec_dont_know;
3063 }
3064
3065 /* If they aren't the same, and aren't affine, we can't do anything
3066 yet. */
3067 else if ((chrec_contains_symbols (chrec_a)
3068 || chrec_contains_symbols (chrec_b))
3069 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3070 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3071 {
3072 dependence_stats.num_subscript_undetermined++;
3073 *overlap_iterations_a = conflict_fn_not_known ();
3074 *overlap_iterations_b = conflict_fn_not_known ();
3075 }
3076
3077 else if (ziv_subscript_p (chrec_a, chrec_b))
3078 analyze_ziv_subscript (chrec_a, chrec_b,
3079 overlap_iterations_a, overlap_iterations_b,
3080 last_conflicts);
3081
3082 else if (siv_subscript_p (chrec_a, chrec_b))
3083 analyze_siv_subscript (chrec_a, chrec_b,
3084 overlap_iterations_a, overlap_iterations_b,
3085 last_conflicts, lnn);
3086
3087 else
3088 analyze_miv_subscript (chrec_a, chrec_b,
3089 overlap_iterations_a, overlap_iterations_b,
3090 last_conflicts, loop_nest);
3091
3092 if (dump_file && (dump_flags & TDF_DETAILS))
3093 {
3094 fprintf (dump_file, " (overlap_iterations_a = ");
3095 dump_conflict_function (dump_file, *overlap_iterations_a);
3096 fprintf (dump_file, ")\n (overlap_iterations_b = ");
3097 dump_conflict_function (dump_file, *overlap_iterations_b);
3098 fprintf (dump_file, "))\n");
3099 }
3100 }
3101
3102 /* Helper function for uniquely inserting distance vectors. */
3103
3104 static void
3105 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3106 {
3107 unsigned i;
3108 lambda_vector v;
3109
3110 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3111 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3112 return;
3113
3114 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3115 }
3116
3117 /* Helper function for uniquely inserting direction vectors. */
3118
3119 static void
3120 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3121 {
3122 unsigned i;
3123 lambda_vector v;
3124
3125 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3126 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3127 return;
3128
3129 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3130 }
3131
3132 /* Add a distance of 1 on all the loops outer than INDEX. If we
3133 haven't yet determined a distance for this outer loop, push a new
3134 distance vector composed of the previous distance, and a distance
3135 of 1 for this outer loop. Example:
3136
3137 | loop_1
3138 | loop_2
3139 | A[10]
3140 | endloop_2
3141 | endloop_1
3142
3143 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3144 save (0, 1), then we have to save (1, 0). */
3145
3146 static void
3147 add_outer_distances (struct data_dependence_relation *ddr,
3148 lambda_vector dist_v, int index)
3149 {
3150 /* For each outer loop where init_v is not set, the accesses are
3151 in dependence of distance 1 in the loop. */
3152 while (--index >= 0)
3153 {
3154 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3155 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3156 save_v[index] = 1;
3157 save_dist_v (ddr, save_v);
3158 }
3159 }
3160
3161 /* Return false when fail to represent the data dependence as a
3162 distance vector. INIT_B is set to true when a component has been
3163 added to the distance vector DIST_V. INDEX_CARRY is then set to
3164 the index in DIST_V that carries the dependence. */
3165
3166 static bool
3167 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3168 struct data_reference *ddr_a,
3169 struct data_reference *ddr_b,
3170 lambda_vector dist_v, bool *init_b,
3171 int *index_carry)
3172 {
3173 unsigned i;
3174 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3175
3176 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3177 {
3178 tree access_fn_a, access_fn_b;
3179 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3180
3181 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3182 {
3183 non_affine_dependence_relation (ddr);
3184 return false;
3185 }
3186
3187 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3188 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3189
3190 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3191 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3192 {
3193 int dist, index;
3194 int var_a = CHREC_VARIABLE (access_fn_a);
3195 int var_b = CHREC_VARIABLE (access_fn_b);
3196
3197 if (var_a != var_b
3198 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3199 {
3200 non_affine_dependence_relation (ddr);
3201 return false;
3202 }
3203
3204 dist = int_cst_value (SUB_DISTANCE (subscript));
3205 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3206 *index_carry = MIN (index, *index_carry);
3207
3208 /* This is the subscript coupling test. If we have already
3209 recorded a distance for this loop (a distance coming from
3210 another subscript), it should be the same. For example,
3211 in the following code, there is no dependence:
3212
3213 | loop i = 0, N, 1
3214 | T[i+1][i] = ...
3215 | ... = T[i][i]
3216 | endloop
3217 */
3218 if (init_v[index] != 0 && dist_v[index] != dist)
3219 {
3220 finalize_ddr_dependent (ddr, chrec_known);
3221 return false;
3222 }
3223
3224 dist_v[index] = dist;
3225 init_v[index] = 1;
3226 *init_b = true;
3227 }
3228 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3229 {
3230 /* This can be for example an affine vs. constant dependence
3231 (T[i] vs. T[3]) that is not an affine dependence and is
3232 not representable as a distance vector. */
3233 non_affine_dependence_relation (ddr);
3234 return false;
3235 }
3236 }
3237
3238 return true;
3239 }
3240
3241 /* Return true when the DDR contains only constant access functions. */
3242
3243 static bool
3244 constant_access_functions (const struct data_dependence_relation *ddr)
3245 {
3246 unsigned i;
3247
3248 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3249 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3250 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3251 return false;
3252
3253 return true;
3254 }
3255
3256 /* Helper function for the case where DDR_A and DDR_B are the same
3257 multivariate access function with a constant step. For an example
3258 see pr34635-1.c. */
3259
3260 static void
3261 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3262 {
3263 int x_1, x_2;
3264 tree c_1 = CHREC_LEFT (c_2);
3265 tree c_0 = CHREC_LEFT (c_1);
3266 lambda_vector dist_v;
3267 int v1, v2, cd;
3268
3269 /* Polynomials with more than 2 variables are not handled yet. When
3270 the evolution steps are parameters, it is not possible to
3271 represent the dependence using classical distance vectors. */
3272 if (TREE_CODE (c_0) != INTEGER_CST
3273 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3274 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3275 {
3276 DDR_AFFINE_P (ddr) = false;
3277 return;
3278 }
3279
3280 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3281 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3282
3283 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3284 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3285 v1 = int_cst_value (CHREC_RIGHT (c_1));
3286 v2 = int_cst_value (CHREC_RIGHT (c_2));
3287 cd = gcd (v1, v2);
3288 v1 /= cd;
3289 v2 /= cd;
3290
3291 if (v2 < 0)
3292 {
3293 v2 = -v2;
3294 v1 = -v1;
3295 }
3296
3297 dist_v[x_1] = v2;
3298 dist_v[x_2] = -v1;
3299 save_dist_v (ddr, dist_v);
3300
3301 add_outer_distances (ddr, dist_v, x_1);
3302 }
3303
3304 /* Helper function for the case where DDR_A and DDR_B are the same
3305 access functions. */
3306
3307 static void
3308 add_other_self_distances (struct data_dependence_relation *ddr)
3309 {
3310 lambda_vector dist_v;
3311 unsigned i;
3312 int index_carry = DDR_NB_LOOPS (ddr);
3313
3314 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3315 {
3316 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3317
3318 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3319 {
3320 if (!evolution_function_is_univariate_p (access_fun))
3321 {
3322 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3323 {
3324 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3325 return;
3326 }
3327
3328 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3329
3330 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3331 add_multivariate_self_dist (ddr, access_fun);
3332 else
3333 /* The evolution step is not constant: it varies in
3334 the outer loop, so this cannot be represented by a
3335 distance vector. For example in pr34635.c the
3336 evolution is {0, +, {0, +, 4}_1}_2. */
3337 DDR_AFFINE_P (ddr) = false;
3338
3339 return;
3340 }
3341
3342 index_carry = MIN (index_carry,
3343 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3344 DDR_LOOP_NEST (ddr)));
3345 }
3346 }
3347
3348 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3349 add_outer_distances (ddr, dist_v, index_carry);
3350 }
3351
3352 static void
3353 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3354 {
3355 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3356
3357 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3358 save_dist_v (ddr, dist_v);
3359 }
3360
3361 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3362 is the case for example when access functions are the same and
3363 equal to a constant, as in:
3364
3365 | loop_1
3366 | A[3] = ...
3367 | ... = A[3]
3368 | endloop_1
3369
3370 in which case the distance vectors are (0) and (1). */
3371
3372 static void
3373 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3374 {
3375 unsigned i, j;
3376
3377 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3378 {
3379 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3380 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3381 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3382
3383 for (j = 0; j < ca->n; j++)
3384 if (affine_function_zero_p (ca->fns[j]))
3385 {
3386 insert_innermost_unit_dist_vector (ddr);
3387 return;
3388 }
3389
3390 for (j = 0; j < cb->n; j++)
3391 if (affine_function_zero_p (cb->fns[j]))
3392 {
3393 insert_innermost_unit_dist_vector (ddr);
3394 return;
3395 }
3396 }
3397 }
3398
3399 /* Compute the classic per loop distance vector. DDR is the data
3400 dependence relation to build a vector from. Return false when fail
3401 to represent the data dependence as a distance vector. */
3402
3403 static bool
3404 build_classic_dist_vector (struct data_dependence_relation *ddr,
3405 struct loop *loop_nest)
3406 {
3407 bool init_b = false;
3408 int index_carry = DDR_NB_LOOPS (ddr);
3409 lambda_vector dist_v;
3410
3411 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3412 return false;
3413
3414 if (same_access_functions (ddr))
3415 {
3416 /* Save the 0 vector. */
3417 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3418 save_dist_v (ddr, dist_v);
3419
3420 if (constant_access_functions (ddr))
3421 add_distance_for_zero_overlaps (ddr);
3422
3423 if (DDR_NB_LOOPS (ddr) > 1)
3424 add_other_self_distances (ddr);
3425
3426 return true;
3427 }
3428
3429 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3430 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3431 dist_v, &init_b, &index_carry))
3432 return false;
3433
3434 /* Save the distance vector if we initialized one. */
3435 if (init_b)
3436 {
3437 /* Verify a basic constraint: classic distance vectors should
3438 always be lexicographically positive.
3439
3440 Data references are collected in the order of execution of
3441 the program, thus for the following loop
3442
3443 | for (i = 1; i < 100; i++)
3444 | for (j = 1; j < 100; j++)
3445 | {
3446 | t = T[j+1][i-1]; // A
3447 | T[j][i] = t + 2; // B
3448 | }
3449
3450 references are collected following the direction of the wind:
3451 A then B. The data dependence tests are performed also
3452 following this order, such that we're looking at the distance
3453 separating the elements accessed by A from the elements later
3454 accessed by B. But in this example, the distance returned by
3455 test_dep (A, B) is lexicographically negative (-1, 1), that
3456 means that the access A occurs later than B with respect to
3457 the outer loop, ie. we're actually looking upwind. In this
3458 case we solve test_dep (B, A) looking downwind to the
3459 lexicographically positive solution, that returns the
3460 distance vector (1, -1). */
3461 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3462 {
3463 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3464 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3465 loop_nest))
3466 return false;
3467 compute_subscript_distance (ddr);
3468 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3469 save_v, &init_b, &index_carry))
3470 return false;
3471 save_dist_v (ddr, save_v);
3472 DDR_REVERSED_P (ddr) = true;
3473
3474 /* In this case there is a dependence forward for all the
3475 outer loops:
3476
3477 | for (k = 1; k < 100; k++)
3478 | for (i = 1; i < 100; i++)
3479 | for (j = 1; j < 100; j++)
3480 | {
3481 | t = T[j+1][i-1]; // A
3482 | T[j][i] = t + 2; // B
3483 | }
3484
3485 the vectors are:
3486 (0, 1, -1)
3487 (1, 1, -1)
3488 (1, -1, 1)
3489 */
3490 if (DDR_NB_LOOPS (ddr) > 1)
3491 {
3492 add_outer_distances (ddr, save_v, index_carry);
3493 add_outer_distances (ddr, dist_v, index_carry);
3494 }
3495 }
3496 else
3497 {
3498 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3499 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3500
3501 if (DDR_NB_LOOPS (ddr) > 1)
3502 {
3503 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3504
3505 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3506 DDR_A (ddr), loop_nest))
3507 return false;
3508 compute_subscript_distance (ddr);
3509 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3510 opposite_v, &init_b,
3511 &index_carry))
3512 return false;
3513
3514 save_dist_v (ddr, save_v);
3515 add_outer_distances (ddr, dist_v, index_carry);
3516 add_outer_distances (ddr, opposite_v, index_carry);
3517 }
3518 else
3519 save_dist_v (ddr, save_v);
3520 }
3521 }
3522 else
3523 {
3524 /* There is a distance of 1 on all the outer loops: Example:
3525 there is a dependence of distance 1 on loop_1 for the array A.
3526
3527 | loop_1
3528 | A[5] = ...
3529 | endloop
3530 */
3531 add_outer_distances (ddr, dist_v,
3532 lambda_vector_first_nz (dist_v,
3533 DDR_NB_LOOPS (ddr), 0));
3534 }
3535
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 {
3538 unsigned i;
3539
3540 fprintf (dump_file, "(build_classic_dist_vector\n");
3541 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3542 {
3543 fprintf (dump_file, " dist_vector = (");
3544 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3545 DDR_NB_LOOPS (ddr));
3546 fprintf (dump_file, " )\n");
3547 }
3548 fprintf (dump_file, ")\n");
3549 }
3550
3551 return true;
3552 }
3553
3554 /* Return the direction for a given distance.
3555 FIXME: Computing dir this way is suboptimal, since dir can catch
3556 cases that dist is unable to represent. */
3557
3558 static inline enum data_dependence_direction
3559 dir_from_dist (int dist)
3560 {
3561 if (dist > 0)
3562 return dir_positive;
3563 else if (dist < 0)
3564 return dir_negative;
3565 else
3566 return dir_equal;
3567 }
3568
3569 /* Compute the classic per loop direction vector. DDR is the data
3570 dependence relation to build a vector from. */
3571
3572 static void
3573 build_classic_dir_vector (struct data_dependence_relation *ddr)
3574 {
3575 unsigned i, j;
3576 lambda_vector dist_v;
3577
3578 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3579 {
3580 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3581
3582 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3583 dir_v[j] = dir_from_dist (dist_v[j]);
3584
3585 save_dir_v (ddr, dir_v);
3586 }
3587 }
3588
3589 /* Helper function. Returns true when there is a dependence between
3590 data references DRA and DRB. */
3591
3592 static bool
3593 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3594 struct data_reference *dra,
3595 struct data_reference *drb,
3596 struct loop *loop_nest)
3597 {
3598 unsigned int i;
3599 tree last_conflicts;
3600 struct subscript *subscript;
3601 tree res = NULL_TREE;
3602
3603 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3604 {
3605 conflict_function *overlaps_a, *overlaps_b;
3606
3607 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3608 DR_ACCESS_FN (drb, i),
3609 &overlaps_a, &overlaps_b,
3610 &last_conflicts, loop_nest);
3611
3612 if (SUB_CONFLICTS_IN_A (subscript))
3613 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3614 if (SUB_CONFLICTS_IN_B (subscript))
3615 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3616
3617 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3618 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3619 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3620
3621 /* If there is any undetermined conflict function we have to
3622 give a conservative answer in case we cannot prove that
3623 no dependence exists when analyzing another subscript. */
3624 if (CF_NOT_KNOWN_P (overlaps_a)
3625 || CF_NOT_KNOWN_P (overlaps_b))
3626 {
3627 res = chrec_dont_know;
3628 continue;
3629 }
3630
3631 /* When there is a subscript with no dependence we can stop. */
3632 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3633 || CF_NO_DEPENDENCE_P (overlaps_b))
3634 {
3635 res = chrec_known;
3636 break;
3637 }
3638 }
3639
3640 if (res == NULL_TREE)
3641 return true;
3642
3643 if (res == chrec_known)
3644 dependence_stats.num_dependence_independent++;
3645 else
3646 dependence_stats.num_dependence_undetermined++;
3647 finalize_ddr_dependent (ddr, res);
3648 return false;
3649 }
3650
3651 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3652
3653 static void
3654 subscript_dependence_tester (struct data_dependence_relation *ddr,
3655 struct loop *loop_nest)
3656 {
3657 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3658 dependence_stats.num_dependence_dependent++;
3659
3660 compute_subscript_distance (ddr);
3661 if (build_classic_dist_vector (ddr, loop_nest))
3662 build_classic_dir_vector (ddr);
3663 }
3664
3665 /* Returns true when all the access functions of A are affine or
3666 constant with respect to LOOP_NEST. */
3667
3668 static bool
3669 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3670 const struct loop *loop_nest)
3671 {
3672 unsigned int i;
3673 vec<tree> fns = DR_ACCESS_FNS (a);
3674 tree t;
3675
3676 FOR_EACH_VEC_ELT (fns, i, t)
3677 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3678 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3679 return false;
3680
3681 return true;
3682 }
3683
3684 /* This computes the affine dependence relation between A and B with
3685 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3686 independence between two accesses, while CHREC_DONT_KNOW is used
3687 for representing the unknown relation.
3688
3689 Note that it is possible to stop the computation of the dependence
3690 relation the first time we detect a CHREC_KNOWN element for a given
3691 subscript. */
3692
3693 void
3694 compute_affine_dependence (struct data_dependence_relation *ddr,
3695 struct loop *loop_nest)
3696 {
3697 struct data_reference *dra = DDR_A (ddr);
3698 struct data_reference *drb = DDR_B (ddr);
3699
3700 if (dump_file && (dump_flags & TDF_DETAILS))
3701 {
3702 fprintf (dump_file, "(compute_affine_dependence\n");
3703 fprintf (dump_file, " stmt_a: ");
3704 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
3705 fprintf (dump_file, " stmt_b: ");
3706 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
3707 }
3708
3709 /* Analyze only when the dependence relation is not yet known. */
3710 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3711 {
3712 dependence_stats.num_dependence_tests++;
3713
3714 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3715 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3716 subscript_dependence_tester (ddr, loop_nest);
3717
3718 /* As a last case, if the dependence cannot be determined, or if
3719 the dependence is considered too difficult to determine, answer
3720 "don't know". */
3721 else
3722 {
3723 dependence_stats.num_dependence_undetermined++;
3724
3725 if (dump_file && (dump_flags & TDF_DETAILS))
3726 {
3727 fprintf (dump_file, "Data ref a:\n");
3728 dump_data_reference (dump_file, dra);
3729 fprintf (dump_file, "Data ref b:\n");
3730 dump_data_reference (dump_file, drb);
3731 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3732 }
3733 finalize_ddr_dependent (ddr, chrec_dont_know);
3734 }
3735 }
3736
3737 if (dump_file && (dump_flags & TDF_DETAILS))
3738 {
3739 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3740 fprintf (dump_file, ") -> no dependence\n");
3741 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
3742 fprintf (dump_file, ") -> dependence analysis failed\n");
3743 else
3744 fprintf (dump_file, ")\n");
3745 }
3746 }
3747
3748 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3749 the data references in DATAREFS, in the LOOP_NEST. When
3750 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3751 relations. Return true when successful, i.e. data references number
3752 is small enough to be handled. */
3753
3754 bool
3755 compute_all_dependences (vec<data_reference_p> datarefs,
3756 vec<ddr_p> *dependence_relations,
3757 vec<loop_p> loop_nest,
3758 bool compute_self_and_rr)
3759 {
3760 struct data_dependence_relation *ddr;
3761 struct data_reference *a, *b;
3762 unsigned int i, j;
3763
3764 if ((int) datarefs.length ()
3765 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
3766 {
3767 struct data_dependence_relation *ddr;
3768
3769 /* Insert a single relation into dependence_relations:
3770 chrec_dont_know. */
3771 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
3772 dependence_relations->safe_push (ddr);
3773 return false;
3774 }
3775
3776 FOR_EACH_VEC_ELT (datarefs, i, a)
3777 for (j = i + 1; datarefs.iterate (j, &b); j++)
3778 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
3779 {
3780 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3781 dependence_relations->safe_push (ddr);
3782 if (loop_nest.exists ())
3783 compute_affine_dependence (ddr, loop_nest[0]);
3784 }
3785
3786 if (compute_self_and_rr)
3787 FOR_EACH_VEC_ELT (datarefs, i, a)
3788 {
3789 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3790 dependence_relations->safe_push (ddr);
3791 if (loop_nest.exists ())
3792 compute_affine_dependence (ddr, loop_nest[0]);
3793 }
3794
3795 return true;
3796 }
3797
3798 /* Describes a location of a memory reference. */
3799
3800 struct data_ref_loc
3801 {
3802 /* The memory reference. */
3803 tree ref;
3804
3805 /* True if the memory reference is read. */
3806 bool is_read;
3807 };
3808
3809
3810 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3811 true if STMT clobbers memory, false otherwise. */
3812
3813 static bool
3814 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
3815 {
3816 bool clobbers_memory = false;
3817 data_ref_loc ref;
3818 tree op0, op1;
3819 enum gimple_code stmt_code = gimple_code (stmt);
3820
3821 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3822 As we cannot model data-references to not spelled out
3823 accesses give up if they may occur. */
3824 if (stmt_code == GIMPLE_CALL
3825 && !(gimple_call_flags (stmt) & ECF_CONST))
3826 {
3827 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
3828 if (gimple_call_internal_p (stmt))
3829 switch (gimple_call_internal_fn (stmt))
3830 {
3831 case IFN_GOMP_SIMD_LANE:
3832 {
3833 struct loop *loop = gimple_bb (stmt)->loop_father;
3834 tree uid = gimple_call_arg (stmt, 0);
3835 gcc_assert (TREE_CODE (uid) == SSA_NAME);
3836 if (loop == NULL
3837 || loop->simduid != SSA_NAME_VAR (uid))
3838 clobbers_memory = true;
3839 break;
3840 }
3841 case IFN_MASK_LOAD:
3842 case IFN_MASK_STORE:
3843 break;
3844 default:
3845 clobbers_memory = true;
3846 break;
3847 }
3848 else
3849 clobbers_memory = true;
3850 }
3851 else if (stmt_code == GIMPLE_ASM
3852 && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
3853 || gimple_vuse (stmt)))
3854 clobbers_memory = true;
3855
3856 if (!gimple_vuse (stmt))
3857 return clobbers_memory;
3858
3859 if (stmt_code == GIMPLE_ASSIGN)
3860 {
3861 tree base;
3862 op0 = gimple_assign_lhs (stmt);
3863 op1 = gimple_assign_rhs1 (stmt);
3864
3865 if (DECL_P (op1)
3866 || (REFERENCE_CLASS_P (op1)
3867 && (base = get_base_address (op1))
3868 && TREE_CODE (base) != SSA_NAME))
3869 {
3870 ref.ref = op1;
3871 ref.is_read = true;
3872 references->safe_push (ref);
3873 }
3874 }
3875 else if (stmt_code == GIMPLE_CALL)
3876 {
3877 unsigned i, n;
3878
3879 ref.is_read = false;
3880 if (gimple_call_internal_p (stmt))
3881 switch (gimple_call_internal_fn (stmt))
3882 {
3883 case IFN_MASK_LOAD:
3884 if (gimple_call_lhs (stmt) == NULL_TREE)
3885 break;
3886 ref.is_read = true;
3887 case IFN_MASK_STORE:
3888 ref.ref = fold_build2 (MEM_REF,
3889 ref.is_read
3890 ? TREE_TYPE (gimple_call_lhs (stmt))
3891 : TREE_TYPE (gimple_call_arg (stmt, 3)),
3892 gimple_call_arg (stmt, 0),
3893 gimple_call_arg (stmt, 1));
3894 references->safe_push (ref);
3895 return false;
3896 default:
3897 break;
3898 }
3899
3900 op0 = gimple_call_lhs (stmt);
3901 n = gimple_call_num_args (stmt);
3902 for (i = 0; i < n; i++)
3903 {
3904 op1 = gimple_call_arg (stmt, i);
3905
3906 if (DECL_P (op1)
3907 || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
3908 {
3909 ref.ref = op1;
3910 ref.is_read = true;
3911 references->safe_push (ref);
3912 }
3913 }
3914 }
3915 else
3916 return clobbers_memory;
3917
3918 if (op0
3919 && (DECL_P (op0)
3920 || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
3921 {
3922 ref.ref = op0;
3923 ref.is_read = false;
3924 references->safe_push (ref);
3925 }
3926 return clobbers_memory;
3927 }
3928
3929
3930 /* Returns true if the loop-nest has any data reference. */
3931
3932 bool
3933 loop_nest_has_data_refs (loop_p loop)
3934 {
3935 basic_block *bbs = get_loop_body (loop);
3936 vec<data_ref_loc> references;
3937 references.create (3);
3938
3939 for (unsigned i = 0; i < loop->num_nodes; i++)
3940 {
3941 basic_block bb = bbs[i];
3942 gimple_stmt_iterator bsi;
3943
3944 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
3945 {
3946 gimple *stmt = gsi_stmt (bsi);
3947 get_references_in_stmt (stmt, &references);
3948 if (references.length ())
3949 {
3950 free (bbs);
3951 references.release ();
3952 return true;
3953 }
3954 }
3955 }
3956 free (bbs);
3957 references.release ();
3958
3959 if (loop->inner)
3960 {
3961 loop = loop->inner;
3962 while (loop)
3963 {
3964 if (loop_nest_has_data_refs (loop))
3965 return true;
3966 loop = loop->next;
3967 }
3968 }
3969 return false;
3970 }
3971
3972 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3973 reference, returns false, otherwise returns true. NEST is the outermost
3974 loop of the loop nest in which the references should be analyzed. */
3975
3976 bool
3977 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
3978 vec<data_reference_p> *datarefs)
3979 {
3980 unsigned i;
3981 auto_vec<data_ref_loc, 2> references;
3982 data_ref_loc *ref;
3983 bool ret = true;
3984 data_reference_p dr;
3985
3986 if (get_references_in_stmt (stmt, &references))
3987 return false;
3988
3989 FOR_EACH_VEC_ELT (references, i, ref)
3990 {
3991 dr = create_data_ref (nest, loop_containing_stmt (stmt),
3992 ref->ref, stmt, ref->is_read);
3993 gcc_assert (dr != NULL);
3994 datarefs->safe_push (dr);
3995 }
3996 references.release ();
3997 return ret;
3998 }
3999
4000 /* Stores the data references in STMT to DATAREFS. If there is an
4001 unanalyzable reference, returns false, otherwise returns true.
4002 NEST is the outermost loop of the loop nest in which the references
4003 should be instantiated, LOOP is the loop in which the references
4004 should be analyzed. */
4005
4006 bool
4007 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
4008 vec<data_reference_p> *datarefs)
4009 {
4010 unsigned i;
4011 auto_vec<data_ref_loc, 2> references;
4012 data_ref_loc *ref;
4013 bool ret = true;
4014 data_reference_p dr;
4015
4016 if (get_references_in_stmt (stmt, &references))
4017 return false;
4018
4019 FOR_EACH_VEC_ELT (references, i, ref)
4020 {
4021 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4022 gcc_assert (dr != NULL);
4023 datarefs->safe_push (dr);
4024 }
4025
4026 references.release ();
4027 return ret;
4028 }
4029
4030 /* Search the data references in LOOP, and record the information into
4031 DATAREFS. Returns chrec_dont_know when failing to analyze a
4032 difficult case, returns NULL_TREE otherwise. */
4033
4034 tree
4035 find_data_references_in_bb (struct loop *loop, basic_block bb,
4036 vec<data_reference_p> *datarefs)
4037 {
4038 gimple_stmt_iterator bsi;
4039
4040 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4041 {
4042 gimple *stmt = gsi_stmt (bsi);
4043
4044 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4045 {
4046 struct data_reference *res;
4047 res = XCNEW (struct data_reference);
4048 datarefs->safe_push (res);
4049
4050 return chrec_dont_know;
4051 }
4052 }
4053
4054 return NULL_TREE;
4055 }
4056
4057 /* Search the data references in LOOP, and record the information into
4058 DATAREFS. Returns chrec_dont_know when failing to analyze a
4059 difficult case, returns NULL_TREE otherwise.
4060
4061 TODO: This function should be made smarter so that it can handle address
4062 arithmetic as if they were array accesses, etc. */
4063
4064 tree
4065 find_data_references_in_loop (struct loop *loop,
4066 vec<data_reference_p> *datarefs)
4067 {
4068 basic_block bb, *bbs;
4069 unsigned int i;
4070
4071 bbs = get_loop_body_in_dom_order (loop);
4072
4073 for (i = 0; i < loop->num_nodes; i++)
4074 {
4075 bb = bbs[i];
4076
4077 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4078 {
4079 free (bbs);
4080 return chrec_dont_know;
4081 }
4082 }
4083 free (bbs);
4084
4085 return NULL_TREE;
4086 }
4087
4088 /* Recursive helper function. */
4089
4090 static bool
4091 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4092 {
4093 /* Inner loops of the nest should not contain siblings. Example:
4094 when there are two consecutive loops,
4095
4096 | loop_0
4097 | loop_1
4098 | A[{0, +, 1}_1]
4099 | endloop_1
4100 | loop_2
4101 | A[{0, +, 1}_2]
4102 | endloop_2
4103 | endloop_0
4104
4105 the dependence relation cannot be captured by the distance
4106 abstraction. */
4107 if (loop->next)
4108 return false;
4109
4110 loop_nest->safe_push (loop);
4111 if (loop->inner)
4112 return find_loop_nest_1 (loop->inner, loop_nest);
4113 return true;
4114 }
4115
4116 /* Return false when the LOOP is not well nested. Otherwise return
4117 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4118 contain the loops from the outermost to the innermost, as they will
4119 appear in the classic distance vector. */
4120
4121 bool
4122 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4123 {
4124 loop_nest->safe_push (loop);
4125 if (loop->inner)
4126 return find_loop_nest_1 (loop->inner, loop_nest);
4127 return true;
4128 }
4129
4130 /* Returns true when the data dependences have been computed, false otherwise.
4131 Given a loop nest LOOP, the following vectors are returned:
4132 DATAREFS is initialized to all the array elements contained in this loop,
4133 DEPENDENCE_RELATIONS contains the relations between the data references.
4134 Compute read-read and self relations if
4135 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4136
4137 bool
4138 compute_data_dependences_for_loop (struct loop *loop,
4139 bool compute_self_and_read_read_dependences,
4140 vec<loop_p> *loop_nest,
4141 vec<data_reference_p> *datarefs,
4142 vec<ddr_p> *dependence_relations)
4143 {
4144 bool res = true;
4145
4146 memset (&dependence_stats, 0, sizeof (dependence_stats));
4147
4148 /* If the loop nest is not well formed, or one of the data references
4149 is not computable, give up without spending time to compute other
4150 dependences. */
4151 if (!loop
4152 || !find_loop_nest (loop, loop_nest)
4153 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4154 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4155 compute_self_and_read_read_dependences))
4156 res = false;
4157
4158 if (dump_file && (dump_flags & TDF_STATS))
4159 {
4160 fprintf (dump_file, "Dependence tester statistics:\n");
4161
4162 fprintf (dump_file, "Number of dependence tests: %d\n",
4163 dependence_stats.num_dependence_tests);
4164 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4165 dependence_stats.num_dependence_dependent);
4166 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4167 dependence_stats.num_dependence_independent);
4168 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4169 dependence_stats.num_dependence_undetermined);
4170
4171 fprintf (dump_file, "Number of subscript tests: %d\n",
4172 dependence_stats.num_subscript_tests);
4173 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4174 dependence_stats.num_subscript_undetermined);
4175 fprintf (dump_file, "Number of same subscript function: %d\n",
4176 dependence_stats.num_same_subscript_function);
4177
4178 fprintf (dump_file, "Number of ziv tests: %d\n",
4179 dependence_stats.num_ziv);
4180 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4181 dependence_stats.num_ziv_dependent);
4182 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4183 dependence_stats.num_ziv_independent);
4184 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4185 dependence_stats.num_ziv_unimplemented);
4186
4187 fprintf (dump_file, "Number of siv tests: %d\n",
4188 dependence_stats.num_siv);
4189 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4190 dependence_stats.num_siv_dependent);
4191 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4192 dependence_stats.num_siv_independent);
4193 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4194 dependence_stats.num_siv_unimplemented);
4195
4196 fprintf (dump_file, "Number of miv tests: %d\n",
4197 dependence_stats.num_miv);
4198 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4199 dependence_stats.num_miv_dependent);
4200 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4201 dependence_stats.num_miv_independent);
4202 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4203 dependence_stats.num_miv_unimplemented);
4204 }
4205
4206 return res;
4207 }
4208
4209 /* Free the memory used by a data dependence relation DDR. */
4210
4211 void
4212 free_dependence_relation (struct data_dependence_relation *ddr)
4213 {
4214 if (ddr == NULL)
4215 return;
4216
4217 if (DDR_SUBSCRIPTS (ddr).exists ())
4218 free_subscripts (DDR_SUBSCRIPTS (ddr));
4219 DDR_DIST_VECTS (ddr).release ();
4220 DDR_DIR_VECTS (ddr).release ();
4221
4222 free (ddr);
4223 }
4224
4225 /* Free the memory used by the data dependence relations from
4226 DEPENDENCE_RELATIONS. */
4227
4228 void
4229 free_dependence_relations (vec<ddr_p> dependence_relations)
4230 {
4231 unsigned int i;
4232 struct data_dependence_relation *ddr;
4233
4234 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4235 if (ddr)
4236 free_dependence_relation (ddr);
4237
4238 dependence_relations.release ();
4239 }
4240
4241 /* Free the memory used by the data references from DATAREFS. */
4242
4243 void
4244 free_data_refs (vec<data_reference_p> datarefs)
4245 {
4246 unsigned int i;
4247 struct data_reference *dr;
4248
4249 FOR_EACH_VEC_ELT (datarefs, i, dr)
4250 free_data_ref (dr);
4251 datarefs.release ();
4252 }