This patch rewrites the old VEC macro-based interface into a new one based on the...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "dumpfile.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
89
90 static struct datadep_stats
91 {
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
96
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
100
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
105
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
110
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
116
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
122
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
125 {
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 }
130
131 /* Returns true iff A divides B. */
132
133 static inline bool
134 int_divides_p (int a, int b)
135 {
136 return ((b % a) == 0);
137 }
138
139 \f
140
141 /* Dump into FILE all the data references from DATAREFS. */
142
143 static void
144 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
145 {
146 unsigned int i;
147 struct data_reference *dr;
148
149 FOR_EACH_VEC_ELT (datarefs, i, dr)
150 dump_data_reference (file, dr);
151 }
152
153 /* Dump into STDERR all the data references from DATAREFS. */
154
155 DEBUG_FUNCTION void
156 debug_data_references (vec<data_reference_p> datarefs)
157 {
158 dump_data_references (stderr, datarefs);
159 }
160
161 /* Print to STDERR the data_reference DR. */
162
163 DEBUG_FUNCTION void
164 debug_data_reference (struct data_reference *dr)
165 {
166 dump_data_reference (stderr, dr);
167 }
168
169 /* Dump function for a DATA_REFERENCE structure. */
170
171 void
172 dump_data_reference (FILE *outf,
173 struct data_reference *dr)
174 {
175 unsigned int i;
176
177 fprintf (outf, "#(Data Ref: \n");
178 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
179 fprintf (outf, "# stmt: ");
180 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
181 fprintf (outf, "# ref: ");
182 print_generic_stmt (outf, DR_REF (dr), 0);
183 fprintf (outf, "# base_object: ");
184 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
185
186 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
187 {
188 fprintf (outf, "# Access function %d: ", i);
189 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
190 }
191 fprintf (outf, "#)\n");
192 }
193
194 /* Dumps the affine function described by FN to the file OUTF. */
195
196 static void
197 dump_affine_function (FILE *outf, affine_fn fn)
198 {
199 unsigned i;
200 tree coef;
201
202 print_generic_expr (outf, fn[0], TDF_SLIM);
203 for (i = 1; fn.iterate (i, &coef); i++)
204 {
205 fprintf (outf, " + ");
206 print_generic_expr (outf, coef, TDF_SLIM);
207 fprintf (outf, " * x_%u", i);
208 }
209 }
210
211 /* Dumps the conflict function CF to the file OUTF. */
212
213 static void
214 dump_conflict_function (FILE *outf, conflict_function *cf)
215 {
216 unsigned i;
217
218 if (cf->n == NO_DEPENDENCE)
219 fprintf (outf, "no dependence\n");
220 else if (cf->n == NOT_KNOWN)
221 fprintf (outf, "not known\n");
222 else
223 {
224 for (i = 0; i < cf->n; i++)
225 {
226 fprintf (outf, "[");
227 dump_affine_function (outf, cf->fns[i]);
228 fprintf (outf, "]\n");
229 }
230 }
231 }
232
233 /* Dump function for a SUBSCRIPT structure. */
234
235 static void
236 dump_subscript (FILE *outf, struct subscript *subscript)
237 {
238 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
239
240 fprintf (outf, "\n (subscript \n");
241 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
242 dump_conflict_function (outf, cf);
243 if (CF_NONTRIVIAL_P (cf))
244 {
245 tree last_iteration = SUB_LAST_CONFLICT (subscript);
246 fprintf (outf, " last_conflict: ");
247 print_generic_stmt (outf, last_iteration, 0);
248 }
249
250 cf = SUB_CONFLICTS_IN_B (subscript);
251 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
254 {
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
258 }
259
260 fprintf (outf, " (Subscript distance: ");
261 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
262 fprintf (outf, " )\n");
263 fprintf (outf, " )\n");
264 }
265
266 /* Print the classic direction vector DIRV to OUTF. */
267
268 static void
269 print_direction_vector (FILE *outf,
270 lambda_vector dirv,
271 int length)
272 {
273 int eq;
274
275 for (eq = 0; eq < length; eq++)
276 {
277 enum data_dependence_direction dir = ((enum data_dependence_direction)
278 dirv[eq]);
279
280 switch (dir)
281 {
282 case dir_positive:
283 fprintf (outf, " +");
284 break;
285 case dir_negative:
286 fprintf (outf, " -");
287 break;
288 case dir_equal:
289 fprintf (outf, " =");
290 break;
291 case dir_positive_or_equal:
292 fprintf (outf, " +=");
293 break;
294 case dir_positive_or_negative:
295 fprintf (outf, " +-");
296 break;
297 case dir_negative_or_equal:
298 fprintf (outf, " -=");
299 break;
300 case dir_star:
301 fprintf (outf, " *");
302 break;
303 default:
304 fprintf (outf, "indep");
305 break;
306 }
307 }
308 fprintf (outf, "\n");
309 }
310
311 /* Print a vector of direction vectors. */
312
313 static void
314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
315 int length)
316 {
317 unsigned j;
318 lambda_vector v;
319
320 FOR_EACH_VEC_ELT (dir_vects, j, v)
321 print_direction_vector (outf, v, length);
322 }
323
324 /* Print out a vector VEC of length N to OUTFILE. */
325
326 static inline void
327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
328 {
329 int i;
330
331 for (i = 0; i < n; i++)
332 fprintf (outfile, "%3d ", vector[i]);
333 fprintf (outfile, "\n");
334 }
335
336 /* Print a vector of distance vectors. */
337
338 static void
339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
340 int length)
341 {
342 unsigned j;
343 lambda_vector v;
344
345 FOR_EACH_VEC_ELT (dist_vects, j, v)
346 print_lambda_vector (outf, v, length);
347 }
348
349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
350
351 static void
352 dump_data_dependence_relation (FILE *outf,
353 struct data_dependence_relation *ddr)
354 {
355 struct data_reference *dra, *drb;
356
357 fprintf (outf, "(Data Dep: \n");
358
359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 {
361 if (ddr)
362 {
363 dra = DDR_A (ddr);
364 drb = DDR_B (ddr);
365 if (dra)
366 dump_data_reference (outf, dra);
367 else
368 fprintf (outf, " (nil)\n");
369 if (drb)
370 dump_data_reference (outf, drb);
371 else
372 fprintf (outf, " (nil)\n");
373 }
374 fprintf (outf, " (don't know)\n)\n");
375 return;
376 }
377
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
380 dump_data_reference (outf, dra);
381 dump_data_reference (outf, drb);
382
383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
384 fprintf (outf, " (no dependence)\n");
385
386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
387 {
388 unsigned int i;
389 struct loop *loopi;
390
391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
392 {
393 fprintf (outf, " access_fn_A: ");
394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
395 fprintf (outf, " access_fn_B: ");
396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
398 }
399
400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
401 fprintf (outf, " loop nest: (");
402 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
403 fprintf (outf, "%d ", loopi->num);
404 fprintf (outf, ")\n");
405
406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
407 {
408 fprintf (outf, " distance_vector: ");
409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
410 DDR_NB_LOOPS (ddr));
411 }
412
413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
414 {
415 fprintf (outf, " direction_vector: ");
416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
417 DDR_NB_LOOPS (ddr));
418 }
419 }
420
421 fprintf (outf, ")\n");
422 }
423
424 /* Debug version. */
425
426 DEBUG_FUNCTION void
427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
428 {
429 dump_data_dependence_relation (stderr, ddr);
430 }
431
432 /* Dump into FILE all the dependence relations from DDRS. */
433
434 void
435 dump_data_dependence_relations (FILE *file,
436 vec<ddr_p> ddrs)
437 {
438 unsigned int i;
439 struct data_dependence_relation *ddr;
440
441 FOR_EACH_VEC_ELT (ddrs, i, ddr)
442 dump_data_dependence_relation (file, ddr);
443 }
444
445 /* Dump to STDERR all the dependence relations from DDRS. */
446
447 DEBUG_FUNCTION void
448 debug_data_dependence_relations (vec<ddr_p> ddrs)
449 {
450 dump_data_dependence_relations (stderr, ddrs);
451 }
452
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
456 considered nest. */
457
458 static void
459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
460 {
461 unsigned int i, j;
462 struct data_dependence_relation *ddr;
463 lambda_vector v;
464
465 FOR_EACH_VEC_ELT (ddrs, i, ddr)
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
467 {
468 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
469 {
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
473 }
474
475 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
476 {
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
480 }
481 }
482
483 fprintf (file, "\n\n");
484 }
485
486 /* Dumps the data dependence relations DDRS in FILE. */
487
488 static void
489 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
490 {
491 unsigned int i;
492 struct data_dependence_relation *ddr;
493
494 FOR_EACH_VEC_ELT (ddrs, i, ddr)
495 dump_data_dependence_relation (file, ddr);
496
497 fprintf (file, "\n\n");
498 }
499
500 DEBUG_FUNCTION void
501 debug_ddrs (vec<ddr_p> ddrs)
502 {
503 dump_ddrs (stderr, ddrs);
504 }
505
506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
508 constant of type ssizetype, and returns true. If we cannot do this
509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
510 is returned. */
511
512 static bool
513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
514 tree *var, tree *off)
515 {
516 tree var0, var1;
517 tree off0, off1;
518 enum tree_code ocode = code;
519
520 *var = NULL_TREE;
521 *off = NULL_TREE;
522
523 switch (code)
524 {
525 case INTEGER_CST:
526 *var = build_int_cst (type, 0);
527 *off = fold_convert (ssizetype, op0);
528 return true;
529
530 case POINTER_PLUS_EXPR:
531 ocode = PLUS_EXPR;
532 /* FALLTHROUGH */
533 case PLUS_EXPR:
534 case MINUS_EXPR:
535 split_constant_offset (op0, &var0, &off0);
536 split_constant_offset (op1, &var1, &off1);
537 *var = fold_build2 (code, type, var0, var1);
538 *off = size_binop (ocode, off0, off1);
539 return true;
540
541 case MULT_EXPR:
542 if (TREE_CODE (op1) != INTEGER_CST)
543 return false;
544
545 split_constant_offset (op0, &var0, &off0);
546 *var = fold_build2 (MULT_EXPR, type, var0, op1);
547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
548 return true;
549
550 case ADDR_EXPR:
551 {
552 tree base, poffset;
553 HOST_WIDE_INT pbitsize, pbitpos;
554 enum machine_mode pmode;
555 int punsignedp, pvolatilep;
556
557 op0 = TREE_OPERAND (op0, 0);
558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
559 &pmode, &punsignedp, &pvolatilep, false);
560
561 if (pbitpos % BITS_PER_UNIT != 0)
562 return false;
563 base = build_fold_addr_expr (base);
564 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
565
566 if (poffset)
567 {
568 split_constant_offset (poffset, &poffset, &off1);
569 off0 = size_binop (PLUS_EXPR, off0, off1);
570 if (POINTER_TYPE_P (TREE_TYPE (base)))
571 base = fold_build_pointer_plus (base, poffset);
572 else
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
575 }
576
577 var0 = fold_convert (type, base);
578
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (CONVERT_EXPR_P (var0))
585 {
586 gimplify_conversion (&var0);
587 // Attempt to fill in any within var0 found ARRAY_REF's
588 // element size from corresponding op embedded ARRAY_REF,
589 // if unsuccessful, just punt.
590 } */
591 while (POINTER_TYPE_P (type))
592 type = TREE_TYPE (type);
593 if (int_size_in_bytes (type) < 0)
594 return false;
595
596 *var = var0;
597 *off = off0;
598 return true;
599 }
600
601 case SSA_NAME:
602 {
603 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
604 enum tree_code subcode;
605
606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
607 return false;
608
609 var0 = gimple_assign_rhs1 (def_stmt);
610 subcode = gimple_assign_rhs_code (def_stmt);
611 var1 = gimple_assign_rhs2 (def_stmt);
612
613 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
614 }
615 CASE_CONVERT:
616 {
617 /* We must not introduce undefined overflow, and we must not change the value.
618 Hence we're okay if the inner type doesn't overflow to start with
619 (pointer or signed), the outer type also is an integer or pointer
620 and the outer precision is at least as large as the inner. */
621 tree itype = TREE_TYPE (op0);
622 if ((POINTER_TYPE_P (itype)
623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
626 {
627 split_constant_offset (op0, &var0, off);
628 *var = fold_convert (type, var0);
629 return true;
630 }
631 return false;
632 }
633
634 default:
635 return false;
636 }
637 }
638
639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
640 will be ssizetype. */
641
642 void
643 split_constant_offset (tree exp, tree *var, tree *off)
644 {
645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
646 enum tree_code code;
647
648 *var = exp;
649 *off = ssize_int (0);
650 STRIP_NOPS (exp);
651
652 if (tree_is_chrec (exp)
653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
654 return;
655
656 otype = TREE_TYPE (exp);
657 code = TREE_CODE (exp);
658 extract_ops_from_tree (exp, &code, &op0, &op1);
659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
660 {
661 *var = fold_convert (type, e);
662 *off = o;
663 }
664 }
665
666 /* Returns the address ADDR of an object in a canonical shape (without nop
667 casts, and with type of pointer to the object). */
668
669 static tree
670 canonicalize_base_object_address (tree addr)
671 {
672 tree orig = addr;
673
674 STRIP_NOPS (addr);
675
676 /* The base address may be obtained by casting from integer, in that case
677 keep the cast. */
678 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
679 return orig;
680
681 if (TREE_CODE (addr) != ADDR_EXPR)
682 return addr;
683
684 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
685 }
686
687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
688 basic block that contains it. Returns true if analysis succeed or false
689 otherwise. */
690
691 bool
692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
693 {
694 gimple stmt = DR_STMT (dr);
695 struct loop *loop = loop_containing_stmt (stmt);
696 tree ref = DR_REF (dr);
697 HOST_WIDE_INT pbitsize, pbitpos;
698 tree base, poffset;
699 enum machine_mode pmode;
700 int punsignedp, pvolatilep;
701 affine_iv base_iv, offset_iv;
702 tree init, dinit, step;
703 bool in_loop = (loop && loop->num);
704
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "analyze_innermost: ");
707
708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
709 &pmode, &punsignedp, &pvolatilep, false);
710 gcc_assert (base != NULL_TREE);
711
712 if (pbitpos % BITS_PER_UNIT != 0)
713 {
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "failed: bit offset alignment.\n");
716 return false;
717 }
718
719 if (TREE_CODE (base) == MEM_REF)
720 {
721 if (!integer_zerop (TREE_OPERAND (base, 1)))
722 {
723 double_int moff = mem_ref_offset (base);
724 tree mofft = double_int_to_tree (sizetype, moff);
725 if (!poffset)
726 poffset = mofft;
727 else
728 poffset = size_binop (PLUS_EXPR, poffset, mofft);
729 }
730 base = TREE_OPERAND (base, 0);
731 }
732 else
733 base = build_fold_addr_expr (base);
734
735 if (in_loop)
736 {
737 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
738 nest ? true : false))
739 {
740 if (nest)
741 {
742 if (dump_file && (dump_flags & TDF_DETAILS))
743 fprintf (dump_file, "failed: evolution of base is not"
744 " affine.\n");
745 return false;
746 }
747 else
748 {
749 base_iv.base = base;
750 base_iv.step = ssize_int (0);
751 base_iv.no_overflow = true;
752 }
753 }
754 }
755 else
756 {
757 base_iv.base = base;
758 base_iv.step = ssize_int (0);
759 base_iv.no_overflow = true;
760 }
761
762 if (!poffset)
763 {
764 offset_iv.base = ssize_int (0);
765 offset_iv.step = ssize_int (0);
766 }
767 else
768 {
769 if (!in_loop)
770 {
771 offset_iv.base = poffset;
772 offset_iv.step = ssize_int (0);
773 }
774 else if (!simple_iv (loop, loop_containing_stmt (stmt),
775 poffset, &offset_iv,
776 nest ? true : false))
777 {
778 if (nest)
779 {
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "failed: evolution of offset is not"
782 " affine.\n");
783 return false;
784 }
785 else
786 {
787 offset_iv.base = poffset;
788 offset_iv.step = ssize_int (0);
789 }
790 }
791 }
792
793 init = ssize_int (pbitpos / BITS_PER_UNIT);
794 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
795 init = size_binop (PLUS_EXPR, init, dinit);
796 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
797 init = size_binop (PLUS_EXPR, init, dinit);
798
799 step = size_binop (PLUS_EXPR,
800 fold_convert (ssizetype, base_iv.step),
801 fold_convert (ssizetype, offset_iv.step));
802
803 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
804
805 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
806 DR_INIT (dr) = init;
807 DR_STEP (dr) = step;
808
809 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
810
811 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "success.\n");
813
814 return true;
815 }
816
817 /* Determines the base object and the list of indices of memory reference
818 DR, analyzed in LOOP and instantiated in loop nest NEST. */
819
820 static void
821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
822 {
823 vec<tree> access_fns = vec<tree>();
824 tree ref, op;
825 tree base, off, access_fn;
826 basic_block before_loop;
827
828 /* If analyzing a basic-block there are no indices to analyze
829 and thus no access functions. */
830 if (!nest)
831 {
832 DR_BASE_OBJECT (dr) = DR_REF (dr);
833 DR_ACCESS_FNS (dr).create (0);
834 return;
835 }
836
837 ref = DR_REF (dr);
838 before_loop = block_before_loop (nest);
839
840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
841 into a two element array with a constant index. The base is
842 then just the immediate underlying object. */
843 if (TREE_CODE (ref) == REALPART_EXPR)
844 {
845 ref = TREE_OPERAND (ref, 0);
846 access_fns.safe_push (integer_zero_node);
847 }
848 else if (TREE_CODE (ref) == IMAGPART_EXPR)
849 {
850 ref = TREE_OPERAND (ref, 0);
851 access_fns.safe_push (integer_one_node);
852 }
853
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (ref))
856 {
857 if (TREE_CODE (ref) == ARRAY_REF)
858 {
859 op = TREE_OPERAND (ref, 1);
860 access_fn = analyze_scalar_evolution (loop, op);
861 access_fn = instantiate_scev (before_loop, loop, access_fn);
862 access_fns.safe_push (access_fn);
863 }
864 else if (TREE_CODE (ref) == COMPONENT_REF
865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
866 {
867 /* For COMPONENT_REFs of records (but not unions!) use the
868 FIELD_DECL offset as constant access function so we can
869 disambiguate a[i].f1 and a[i].f2. */
870 tree off = component_ref_field_offset (ref);
871 off = size_binop (PLUS_EXPR,
872 size_binop (MULT_EXPR,
873 fold_convert (bitsizetype, off),
874 bitsize_int (BITS_PER_UNIT)),
875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
876 access_fns.safe_push (off);
877 }
878 else
879 /* If we have an unhandled component we could not translate
880 to an access function stop analyzing. We have determined
881 our base object in this case. */
882 break;
883
884 ref = TREE_OPERAND (ref, 0);
885 }
886
887 /* If the address operand of a MEM_REF base has an evolution in the
888 analyzed nest, add it as an additional independent access-function. */
889 if (TREE_CODE (ref) == MEM_REF)
890 {
891 op = TREE_OPERAND (ref, 0);
892 access_fn = analyze_scalar_evolution (loop, op);
893 access_fn = instantiate_scev (before_loop, loop, access_fn);
894 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
895 {
896 tree orig_type;
897 tree memoff = TREE_OPERAND (ref, 1);
898 base = initial_condition (access_fn);
899 orig_type = TREE_TYPE (base);
900 STRIP_USELESS_TYPE_CONVERSION (base);
901 split_constant_offset (base, &base, &off);
902 /* Fold the MEM_REF offset into the evolutions initial
903 value to make more bases comparable. */
904 if (!integer_zerop (memoff))
905 {
906 off = size_binop (PLUS_EXPR, off,
907 fold_convert (ssizetype, memoff));
908 memoff = build_int_cst (TREE_TYPE (memoff), 0);
909 }
910 access_fn = chrec_replace_initial_condition
911 (access_fn, fold_convert (orig_type, off));
912 /* ??? This is still not a suitable base object for
913 dr_may_alias_p - the base object needs to be an
914 access that covers the object as whole. With
915 an evolution in the pointer this cannot be
916 guaranteed.
917 As a band-aid, mark the access so we can special-case
918 it in dr_may_alias_p. */
919 ref = fold_build2_loc (EXPR_LOCATION (ref),
920 MEM_REF, TREE_TYPE (ref),
921 base, memoff);
922 DR_UNCONSTRAINED_BASE (dr) = true;
923 access_fns.safe_push (access_fn);
924 }
925 }
926 else if (DECL_P (ref))
927 {
928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
929 ref = build2 (MEM_REF, TREE_TYPE (ref),
930 build_fold_addr_expr (ref),
931 build_int_cst (reference_alias_ptr_type (ref), 0));
932 }
933
934 DR_BASE_OBJECT (dr) = ref;
935 DR_ACCESS_FNS (dr) = access_fns;
936 }
937
938 /* Extracts the alias analysis information from the memory reference DR. */
939
940 static void
941 dr_analyze_alias (struct data_reference *dr)
942 {
943 tree ref = DR_REF (dr);
944 tree base = get_base_address (ref), addr;
945
946 if (INDIRECT_REF_P (base)
947 || TREE_CODE (base) == MEM_REF)
948 {
949 addr = TREE_OPERAND (base, 0);
950 if (TREE_CODE (addr) == SSA_NAME)
951 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
952 }
953 }
954
955 /* Frees data reference DR. */
956
957 void
958 free_data_ref (data_reference_p dr)
959 {
960 DR_ACCESS_FNS (dr).release ();
961 free (dr);
962 }
963
964 /* Analyzes memory reference MEMREF accessed in STMT. The reference
965 is read if IS_READ is true, write otherwise. Returns the
966 data_reference description of MEMREF. NEST is the outermost loop
967 in which the reference should be instantiated, LOOP is the loop in
968 which the data reference should be analyzed. */
969
970 struct data_reference *
971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
972 bool is_read)
973 {
974 struct data_reference *dr;
975
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 {
978 fprintf (dump_file, "Creating dr for ");
979 print_generic_expr (dump_file, memref, TDF_SLIM);
980 fprintf (dump_file, "\n");
981 }
982
983 dr = XCNEW (struct data_reference);
984 DR_STMT (dr) = stmt;
985 DR_REF (dr) = memref;
986 DR_IS_READ (dr) = is_read;
987
988 dr_analyze_innermost (dr, nest);
989 dr_analyze_indices (dr, nest, loop);
990 dr_analyze_alias (dr);
991
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 {
994 unsigned i;
995 fprintf (dump_file, "\tbase_address: ");
996 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
997 fprintf (dump_file, "\n\toffset from base address: ");
998 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
999 fprintf (dump_file, "\n\tconstant offset from base address: ");
1000 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1001 fprintf (dump_file, "\n\tstep: ");
1002 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1003 fprintf (dump_file, "\n\taligned to: ");
1004 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1005 fprintf (dump_file, "\n\tbase_object: ");
1006 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1007 fprintf (dump_file, "\n");
1008 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1009 {
1010 fprintf (dump_file, "\tAccess function %d: ", i);
1011 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1012 }
1013 }
1014
1015 return dr;
1016 }
1017
1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1019 expressions. */
1020 static bool
1021 dr_equal_offsets_p1 (tree offset1, tree offset2)
1022 {
1023 bool res;
1024
1025 STRIP_NOPS (offset1);
1026 STRIP_NOPS (offset2);
1027
1028 if (offset1 == offset2)
1029 return true;
1030
1031 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1032 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1033 return false;
1034
1035 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1036 TREE_OPERAND (offset2, 0));
1037
1038 if (!res || !BINARY_CLASS_P (offset1))
1039 return res;
1040
1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1042 TREE_OPERAND (offset2, 1));
1043
1044 return res;
1045 }
1046
1047 /* Check if DRA and DRB have equal offsets. */
1048 bool
1049 dr_equal_offsets_p (struct data_reference *dra,
1050 struct data_reference *drb)
1051 {
1052 tree offset1, offset2;
1053
1054 offset1 = DR_OFFSET (dra);
1055 offset2 = DR_OFFSET (drb);
1056
1057 return dr_equal_offsets_p1 (offset1, offset2);
1058 }
1059
1060 /* Returns true if FNA == FNB. */
1061
1062 static bool
1063 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1064 {
1065 unsigned i, n = fna.length ();
1066
1067 if (n != fnb.length ())
1068 return false;
1069
1070 for (i = 0; i < n; i++)
1071 if (!operand_equal_p (fna[i], fnb[i], 0))
1072 return false;
1073
1074 return true;
1075 }
1076
1077 /* If all the functions in CF are the same, returns one of them,
1078 otherwise returns NULL. */
1079
1080 static affine_fn
1081 common_affine_function (conflict_function *cf)
1082 {
1083 unsigned i;
1084 affine_fn comm;
1085
1086 if (!CF_NONTRIVIAL_P (cf))
1087 return affine_fn();
1088
1089 comm = cf->fns[0];
1090
1091 for (i = 1; i < cf->n; i++)
1092 if (!affine_function_equal_p (comm, cf->fns[i]))
1093 return affine_fn();
1094
1095 return comm;
1096 }
1097
1098 /* Returns the base of the affine function FN. */
1099
1100 static tree
1101 affine_function_base (affine_fn fn)
1102 {
1103 return fn[0];
1104 }
1105
1106 /* Returns true if FN is a constant. */
1107
1108 static bool
1109 affine_function_constant_p (affine_fn fn)
1110 {
1111 unsigned i;
1112 tree coef;
1113
1114 for (i = 1; fn.iterate (i, &coef); i++)
1115 if (!integer_zerop (coef))
1116 return false;
1117
1118 return true;
1119 }
1120
1121 /* Returns true if FN is the zero constant function. */
1122
1123 static bool
1124 affine_function_zero_p (affine_fn fn)
1125 {
1126 return (integer_zerop (affine_function_base (fn))
1127 && affine_function_constant_p (fn));
1128 }
1129
1130 /* Returns a signed integer type with the largest precision from TA
1131 and TB. */
1132
1133 static tree
1134 signed_type_for_types (tree ta, tree tb)
1135 {
1136 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1137 return signed_type_for (ta);
1138 else
1139 return signed_type_for (tb);
1140 }
1141
1142 /* Applies operation OP on affine functions FNA and FNB, and returns the
1143 result. */
1144
1145 static affine_fn
1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1147 {
1148 unsigned i, n, m;
1149 affine_fn ret;
1150 tree coef;
1151
1152 if (fnb.length () > fna.length ())
1153 {
1154 n = fna.length ();
1155 m = fnb.length ();
1156 }
1157 else
1158 {
1159 n = fnb.length ();
1160 m = fna.length ();
1161 }
1162
1163 ret.create (m);
1164 for (i = 0; i < n; i++)
1165 {
1166 tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1167 TREE_TYPE (fnb[i]));
1168 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1169 }
1170
1171 for (; fna.iterate (i, &coef); i++)
1172 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1173 coef, integer_zero_node));
1174 for (; fnb.iterate (i, &coef); i++)
1175 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1176 integer_zero_node, coef));
1177
1178 return ret;
1179 }
1180
1181 /* Returns the sum of affine functions FNA and FNB. */
1182
1183 static affine_fn
1184 affine_fn_plus (affine_fn fna, affine_fn fnb)
1185 {
1186 return affine_fn_op (PLUS_EXPR, fna, fnb);
1187 }
1188
1189 /* Returns the difference of affine functions FNA and FNB. */
1190
1191 static affine_fn
1192 affine_fn_minus (affine_fn fna, affine_fn fnb)
1193 {
1194 return affine_fn_op (MINUS_EXPR, fna, fnb);
1195 }
1196
1197 /* Frees affine function FN. */
1198
1199 static void
1200 affine_fn_free (affine_fn fn)
1201 {
1202 fn.release ();
1203 }
1204
1205 /* Determine for each subscript in the data dependence relation DDR
1206 the distance. */
1207
1208 static void
1209 compute_subscript_distance (struct data_dependence_relation *ddr)
1210 {
1211 conflict_function *cf_a, *cf_b;
1212 affine_fn fn_a, fn_b, diff;
1213
1214 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1215 {
1216 unsigned int i;
1217
1218 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1219 {
1220 struct subscript *subscript;
1221
1222 subscript = DDR_SUBSCRIPT (ddr, i);
1223 cf_a = SUB_CONFLICTS_IN_A (subscript);
1224 cf_b = SUB_CONFLICTS_IN_B (subscript);
1225
1226 fn_a = common_affine_function (cf_a);
1227 fn_b = common_affine_function (cf_b);
1228 if (!fn_a.exists () || !fn_b.exists ())
1229 {
1230 SUB_DISTANCE (subscript) = chrec_dont_know;
1231 return;
1232 }
1233 diff = affine_fn_minus (fn_a, fn_b);
1234
1235 if (affine_function_constant_p (diff))
1236 SUB_DISTANCE (subscript) = affine_function_base (diff);
1237 else
1238 SUB_DISTANCE (subscript) = chrec_dont_know;
1239
1240 affine_fn_free (diff);
1241 }
1242 }
1243 }
1244
1245 /* Returns the conflict function for "unknown". */
1246
1247 static conflict_function *
1248 conflict_fn_not_known (void)
1249 {
1250 conflict_function *fn = XCNEW (conflict_function);
1251 fn->n = NOT_KNOWN;
1252
1253 return fn;
1254 }
1255
1256 /* Returns the conflict function for "independent". */
1257
1258 static conflict_function *
1259 conflict_fn_no_dependence (void)
1260 {
1261 conflict_function *fn = XCNEW (conflict_function);
1262 fn->n = NO_DEPENDENCE;
1263
1264 return fn;
1265 }
1266
1267 /* Returns true if the address of OBJ is invariant in LOOP. */
1268
1269 static bool
1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1271 {
1272 while (handled_component_p (obj))
1273 {
1274 if (TREE_CODE (obj) == ARRAY_REF)
1275 {
1276 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1277 need to check the stride and the lower bound of the reference. */
1278 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1279 loop->num)
1280 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1281 loop->num))
1282 return false;
1283 }
1284 else if (TREE_CODE (obj) == COMPONENT_REF)
1285 {
1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287 loop->num))
1288 return false;
1289 }
1290 obj = TREE_OPERAND (obj, 0);
1291 }
1292
1293 if (!INDIRECT_REF_P (obj)
1294 && TREE_CODE (obj) != MEM_REF)
1295 return true;
1296
1297 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1298 loop->num);
1299 }
1300
1301 /* Returns false if we can prove that data references A and B do not alias,
1302 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1303 considered. */
1304
1305 bool
1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1307 bool loop_nest)
1308 {
1309 tree addr_a = DR_BASE_OBJECT (a);
1310 tree addr_b = DR_BASE_OBJECT (b);
1311
1312 /* If we are not processing a loop nest but scalar code we
1313 do not need to care about possible cross-iteration dependences
1314 and thus can process the full original reference. Do so,
1315 similar to how loop invariant motion applies extra offset-based
1316 disambiguation. */
1317 if (!loop_nest)
1318 {
1319 aff_tree off1, off2;
1320 double_int size1, size2;
1321 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1322 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1323 aff_combination_scale (&off1, double_int_minus_one);
1324 aff_combination_add (&off2, &off1);
1325 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1326 return false;
1327 }
1328
1329 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1330 the size of the base-object. So we cannot do any offset/overlap
1331 based analysis but have to rely on points-to information only. */
1332 if (TREE_CODE (addr_a) == MEM_REF
1333 && DR_UNCONSTRAINED_BASE (a))
1334 {
1335 if (TREE_CODE (addr_b) == MEM_REF
1336 && DR_UNCONSTRAINED_BASE (b))
1337 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1338 TREE_OPERAND (addr_b, 0));
1339 else
1340 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1341 build_fold_addr_expr (addr_b));
1342 }
1343 else if (TREE_CODE (addr_b) == MEM_REF
1344 && DR_UNCONSTRAINED_BASE (b))
1345 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1346 TREE_OPERAND (addr_b, 0));
1347
1348 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1349 that is being subsetted in the loop nest. */
1350 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1351 return refs_output_dependent_p (addr_a, addr_b);
1352 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1353 return refs_anti_dependent_p (addr_a, addr_b);
1354 return refs_may_alias_p (addr_a, addr_b);
1355 }
1356
1357 /* Initialize a data dependence relation between data accesses A and
1358 B. NB_LOOPS is the number of loops surrounding the references: the
1359 size of the classic distance/direction vectors. */
1360
1361 struct data_dependence_relation *
1362 initialize_data_dependence_relation (struct data_reference *a,
1363 struct data_reference *b,
1364 vec<loop_p> loop_nest)
1365 {
1366 struct data_dependence_relation *res;
1367 unsigned int i;
1368
1369 res = XNEW (struct data_dependence_relation);
1370 DDR_A (res) = a;
1371 DDR_B (res) = b;
1372 DDR_LOOP_NEST (res).create (0);
1373 DDR_REVERSED_P (res) = false;
1374 DDR_SUBSCRIPTS (res).create (0);
1375 DDR_DIR_VECTS (res).create (0);
1376 DDR_DIST_VECTS (res).create (0);
1377
1378 if (a == NULL || b == NULL)
1379 {
1380 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1381 return res;
1382 }
1383
1384 /* If the data references do not alias, then they are independent. */
1385 if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1386 {
1387 DDR_ARE_DEPENDENT (res) = chrec_known;
1388 return res;
1389 }
1390
1391 /* The case where the references are exactly the same. */
1392 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1393 {
1394 if (loop_nest.exists ()
1395 && !object_address_invariant_in_loop_p (loop_nest[0],
1396 DR_BASE_OBJECT (a)))
1397 {
1398 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1399 return res;
1400 }
1401 DDR_AFFINE_P (res) = true;
1402 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1403 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1404 DDR_LOOP_NEST (res) = loop_nest;
1405 DDR_INNER_LOOP (res) = 0;
1406 DDR_SELF_REFERENCE (res) = true;
1407 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1408 {
1409 struct subscript *subscript;
1410
1411 subscript = XNEW (struct subscript);
1412 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1413 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1414 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1415 SUB_DISTANCE (subscript) = chrec_dont_know;
1416 DDR_SUBSCRIPTS (res).safe_push (subscript);
1417 }
1418 return res;
1419 }
1420
1421 /* If the references do not access the same object, we do not know
1422 whether they alias or not. */
1423 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1424 {
1425 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1426 return res;
1427 }
1428
1429 /* If the base of the object is not invariant in the loop nest, we cannot
1430 analyze it. TODO -- in fact, it would suffice to record that there may
1431 be arbitrary dependences in the loops where the base object varies. */
1432 if (loop_nest.exists ()
1433 && !object_address_invariant_in_loop_p (loop_nest[0],
1434 DR_BASE_OBJECT (a)))
1435 {
1436 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1437 return res;
1438 }
1439
1440 /* If the number of dimensions of the access to not agree we can have
1441 a pointer access to a component of the array element type and an
1442 array access while the base-objects are still the same. Punt. */
1443 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1444 {
1445 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1446 return res;
1447 }
1448
1449 DDR_AFFINE_P (res) = true;
1450 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1451 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1452 DDR_LOOP_NEST (res) = loop_nest;
1453 DDR_INNER_LOOP (res) = 0;
1454 DDR_SELF_REFERENCE (res) = false;
1455
1456 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1457 {
1458 struct subscript *subscript;
1459
1460 subscript = XNEW (struct subscript);
1461 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1462 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1463 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1464 SUB_DISTANCE (subscript) = chrec_dont_know;
1465 DDR_SUBSCRIPTS (res).safe_push (subscript);
1466 }
1467
1468 return res;
1469 }
1470
1471 /* Frees memory used by the conflict function F. */
1472
1473 static void
1474 free_conflict_function (conflict_function *f)
1475 {
1476 unsigned i;
1477
1478 if (CF_NONTRIVIAL_P (f))
1479 {
1480 for (i = 0; i < f->n; i++)
1481 affine_fn_free (f->fns[i]);
1482 }
1483 free (f);
1484 }
1485
1486 /* Frees memory used by SUBSCRIPTS. */
1487
1488 static void
1489 free_subscripts (vec<subscript_p> subscripts)
1490 {
1491 unsigned i;
1492 subscript_p s;
1493
1494 FOR_EACH_VEC_ELT (subscripts, i, s)
1495 {
1496 free_conflict_function (s->conflicting_iterations_in_a);
1497 free_conflict_function (s->conflicting_iterations_in_b);
1498 free (s);
1499 }
1500 subscripts.release ();
1501 }
1502
1503 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1504 description. */
1505
1506 static inline void
1507 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1508 tree chrec)
1509 {
1510 if (dump_file && (dump_flags & TDF_DETAILS))
1511 {
1512 fprintf (dump_file, "(dependence classified: ");
1513 print_generic_expr (dump_file, chrec, 0);
1514 fprintf (dump_file, ")\n");
1515 }
1516
1517 DDR_ARE_DEPENDENT (ddr) = chrec;
1518 free_subscripts (DDR_SUBSCRIPTS (ddr));
1519 DDR_SUBSCRIPTS (ddr).create (0);
1520 }
1521
1522 /* The dependence relation DDR cannot be represented by a distance
1523 vector. */
1524
1525 static inline void
1526 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1527 {
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1529 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1530
1531 DDR_AFFINE_P (ddr) = false;
1532 }
1533
1534 \f
1535
1536 /* This section contains the classic Banerjee tests. */
1537
1538 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1539 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1540
1541 static inline bool
1542 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1543 {
1544 return (evolution_function_is_constant_p (chrec_a)
1545 && evolution_function_is_constant_p (chrec_b));
1546 }
1547
1548 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1549 variable, i.e., if the SIV (Single Index Variable) test is true. */
1550
1551 static bool
1552 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1553 {
1554 if ((evolution_function_is_constant_p (chrec_a)
1555 && evolution_function_is_univariate_p (chrec_b))
1556 || (evolution_function_is_constant_p (chrec_b)
1557 && evolution_function_is_univariate_p (chrec_a)))
1558 return true;
1559
1560 if (evolution_function_is_univariate_p (chrec_a)
1561 && evolution_function_is_univariate_p (chrec_b))
1562 {
1563 switch (TREE_CODE (chrec_a))
1564 {
1565 case POLYNOMIAL_CHREC:
1566 switch (TREE_CODE (chrec_b))
1567 {
1568 case POLYNOMIAL_CHREC:
1569 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1570 return false;
1571
1572 default:
1573 return true;
1574 }
1575
1576 default:
1577 return true;
1578 }
1579 }
1580
1581 return false;
1582 }
1583
1584 /* Creates a conflict function with N dimensions. The affine functions
1585 in each dimension follow. */
1586
1587 static conflict_function *
1588 conflict_fn (unsigned n, ...)
1589 {
1590 unsigned i;
1591 conflict_function *ret = XCNEW (conflict_function);
1592 va_list ap;
1593
1594 gcc_assert (0 < n && n <= MAX_DIM);
1595 va_start(ap, n);
1596
1597 ret->n = n;
1598 for (i = 0; i < n; i++)
1599 ret->fns[i] = va_arg (ap, affine_fn);
1600 va_end(ap);
1601
1602 return ret;
1603 }
1604
1605 /* Returns constant affine function with value CST. */
1606
1607 static affine_fn
1608 affine_fn_cst (tree cst)
1609 {
1610 affine_fn fn;
1611 fn.create (1);
1612 fn.quick_push (cst);
1613 return fn;
1614 }
1615
1616 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1617
1618 static affine_fn
1619 affine_fn_univar (tree cst, unsigned dim, tree coef)
1620 {
1621 affine_fn fn;
1622 fn.create (dim + 1);
1623 unsigned i;
1624
1625 gcc_assert (dim > 0);
1626 fn.quick_push (cst);
1627 for (i = 1; i < dim; i++)
1628 fn.quick_push (integer_zero_node);
1629 fn.quick_push (coef);
1630 return fn;
1631 }
1632
1633 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1634 *OVERLAPS_B are initialized to the functions that describe the
1635 relation between the elements accessed twice by CHREC_A and
1636 CHREC_B. For k >= 0, the following property is verified:
1637
1638 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1639
1640 static void
1641 analyze_ziv_subscript (tree chrec_a,
1642 tree chrec_b,
1643 conflict_function **overlaps_a,
1644 conflict_function **overlaps_b,
1645 tree *last_conflicts)
1646 {
1647 tree type, difference;
1648 dependence_stats.num_ziv++;
1649
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 fprintf (dump_file, "(analyze_ziv_subscript \n");
1652
1653 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1654 chrec_a = chrec_convert (type, chrec_a, NULL);
1655 chrec_b = chrec_convert (type, chrec_b, NULL);
1656 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1657
1658 switch (TREE_CODE (difference))
1659 {
1660 case INTEGER_CST:
1661 if (integer_zerop (difference))
1662 {
1663 /* The difference is equal to zero: the accessed index
1664 overlaps for each iteration in the loop. */
1665 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1666 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1667 *last_conflicts = chrec_dont_know;
1668 dependence_stats.num_ziv_dependent++;
1669 }
1670 else
1671 {
1672 /* The accesses do not overlap. */
1673 *overlaps_a = conflict_fn_no_dependence ();
1674 *overlaps_b = conflict_fn_no_dependence ();
1675 *last_conflicts = integer_zero_node;
1676 dependence_stats.num_ziv_independent++;
1677 }
1678 break;
1679
1680 default:
1681 /* We're not sure whether the indexes overlap. For the moment,
1682 conservatively answer "don't know". */
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1684 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1685
1686 *overlaps_a = conflict_fn_not_known ();
1687 *overlaps_b = conflict_fn_not_known ();
1688 *last_conflicts = chrec_dont_know;
1689 dependence_stats.num_ziv_unimplemented++;
1690 break;
1691 }
1692
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, ")\n");
1695 }
1696
1697 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1698 and only if it fits to the int type. If this is not the case, or the
1699 bound on the number of iterations of LOOP could not be derived, returns
1700 chrec_dont_know. */
1701
1702 static tree
1703 max_stmt_executions_tree (struct loop *loop)
1704 {
1705 double_int nit;
1706
1707 if (!max_stmt_executions (loop, &nit))
1708 return chrec_dont_know;
1709
1710 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1711 return chrec_dont_know;
1712
1713 return double_int_to_tree (unsigned_type_node, nit);
1714 }
1715
1716 /* Determine whether the CHREC is always positive/negative. If the expression
1717 cannot be statically analyzed, return false, otherwise set the answer into
1718 VALUE. */
1719
1720 static bool
1721 chrec_is_positive (tree chrec, bool *value)
1722 {
1723 bool value0, value1, value2;
1724 tree end_value, nb_iter;
1725
1726 switch (TREE_CODE (chrec))
1727 {
1728 case POLYNOMIAL_CHREC:
1729 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1730 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1731 return false;
1732
1733 /* FIXME -- overflows. */
1734 if (value0 == value1)
1735 {
1736 *value = value0;
1737 return true;
1738 }
1739
1740 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1741 and the proof consists in showing that the sign never
1742 changes during the execution of the loop, from 0 to
1743 loop->nb_iterations. */
1744 if (!evolution_function_is_affine_p (chrec))
1745 return false;
1746
1747 nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1748 if (chrec_contains_undetermined (nb_iter))
1749 return false;
1750
1751 #if 0
1752 /* TODO -- If the test is after the exit, we may decrease the number of
1753 iterations by one. */
1754 if (after_exit)
1755 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1756 #endif
1757
1758 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1759
1760 if (!chrec_is_positive (end_value, &value2))
1761 return false;
1762
1763 *value = value0;
1764 return value0 == value1;
1765
1766 case INTEGER_CST:
1767 switch (tree_int_cst_sgn (chrec))
1768 {
1769 case -1:
1770 *value = false;
1771 break;
1772 case 1:
1773 *value = true;
1774 break;
1775 default:
1776 return false;
1777 }
1778 return true;
1779
1780 default:
1781 return false;
1782 }
1783 }
1784
1785
1786 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1787 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1788 *OVERLAPS_B are initialized to the functions that describe the
1789 relation between the elements accessed twice by CHREC_A and
1790 CHREC_B. For k >= 0, the following property is verified:
1791
1792 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1793
1794 static void
1795 analyze_siv_subscript_cst_affine (tree chrec_a,
1796 tree chrec_b,
1797 conflict_function **overlaps_a,
1798 conflict_function **overlaps_b,
1799 tree *last_conflicts)
1800 {
1801 bool value0, value1, value2;
1802 tree type, difference, tmp;
1803
1804 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1805 chrec_a = chrec_convert (type, chrec_a, NULL);
1806 chrec_b = chrec_convert (type, chrec_b, NULL);
1807 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1808
1809 /* Special case overlap in the first iteration. */
1810 if (integer_zerop (difference))
1811 {
1812 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1813 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1814 *last_conflicts = integer_one_node;
1815 return;
1816 }
1817
1818 if (!chrec_is_positive (initial_condition (difference), &value0))
1819 {
1820 if (dump_file && (dump_flags & TDF_DETAILS))
1821 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1822
1823 dependence_stats.num_siv_unimplemented++;
1824 *overlaps_a = conflict_fn_not_known ();
1825 *overlaps_b = conflict_fn_not_known ();
1826 *last_conflicts = chrec_dont_know;
1827 return;
1828 }
1829 else
1830 {
1831 if (value0 == false)
1832 {
1833 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1834 {
1835 if (dump_file && (dump_flags & TDF_DETAILS))
1836 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1837
1838 *overlaps_a = conflict_fn_not_known ();
1839 *overlaps_b = conflict_fn_not_known ();
1840 *last_conflicts = chrec_dont_know;
1841 dependence_stats.num_siv_unimplemented++;
1842 return;
1843 }
1844 else
1845 {
1846 if (value1 == true)
1847 {
1848 /* Example:
1849 chrec_a = 12
1850 chrec_b = {10, +, 1}
1851 */
1852
1853 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1854 {
1855 HOST_WIDE_INT numiter;
1856 struct loop *loop = get_chrec_loop (chrec_b);
1857
1858 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1859 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1860 fold_build1 (ABS_EXPR, type, difference),
1861 CHREC_RIGHT (chrec_b));
1862 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1863 *last_conflicts = integer_one_node;
1864
1865
1866 /* Perform weak-zero siv test to see if overlap is
1867 outside the loop bounds. */
1868 numiter = max_stmt_executions_int (loop);
1869
1870 if (numiter >= 0
1871 && compare_tree_int (tmp, numiter) > 0)
1872 {
1873 free_conflict_function (*overlaps_a);
1874 free_conflict_function (*overlaps_b);
1875 *overlaps_a = conflict_fn_no_dependence ();
1876 *overlaps_b = conflict_fn_no_dependence ();
1877 *last_conflicts = integer_zero_node;
1878 dependence_stats.num_siv_independent++;
1879 return;
1880 }
1881 dependence_stats.num_siv_dependent++;
1882 return;
1883 }
1884
1885 /* When the step does not divide the difference, there are
1886 no overlaps. */
1887 else
1888 {
1889 *overlaps_a = conflict_fn_no_dependence ();
1890 *overlaps_b = conflict_fn_no_dependence ();
1891 *last_conflicts = integer_zero_node;
1892 dependence_stats.num_siv_independent++;
1893 return;
1894 }
1895 }
1896
1897 else
1898 {
1899 /* Example:
1900 chrec_a = 12
1901 chrec_b = {10, +, -1}
1902
1903 In this case, chrec_a will not overlap with chrec_b. */
1904 *overlaps_a = conflict_fn_no_dependence ();
1905 *overlaps_b = conflict_fn_no_dependence ();
1906 *last_conflicts = integer_zero_node;
1907 dependence_stats.num_siv_independent++;
1908 return;
1909 }
1910 }
1911 }
1912 else
1913 {
1914 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1915 {
1916 if (dump_file && (dump_flags & TDF_DETAILS))
1917 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1918
1919 *overlaps_a = conflict_fn_not_known ();
1920 *overlaps_b = conflict_fn_not_known ();
1921 *last_conflicts = chrec_dont_know;
1922 dependence_stats.num_siv_unimplemented++;
1923 return;
1924 }
1925 else
1926 {
1927 if (value2 == false)
1928 {
1929 /* Example:
1930 chrec_a = 3
1931 chrec_b = {10, +, -1}
1932 */
1933 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1934 {
1935 HOST_WIDE_INT numiter;
1936 struct loop *loop = get_chrec_loop (chrec_b);
1937
1938 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1939 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1940 CHREC_RIGHT (chrec_b));
1941 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1942 *last_conflicts = integer_one_node;
1943
1944 /* Perform weak-zero siv test to see if overlap is
1945 outside the loop bounds. */
1946 numiter = max_stmt_executions_int (loop);
1947
1948 if (numiter >= 0
1949 && compare_tree_int (tmp, numiter) > 0)
1950 {
1951 free_conflict_function (*overlaps_a);
1952 free_conflict_function (*overlaps_b);
1953 *overlaps_a = conflict_fn_no_dependence ();
1954 *overlaps_b = conflict_fn_no_dependence ();
1955 *last_conflicts = integer_zero_node;
1956 dependence_stats.num_siv_independent++;
1957 return;
1958 }
1959 dependence_stats.num_siv_dependent++;
1960 return;
1961 }
1962
1963 /* When the step does not divide the difference, there
1964 are no overlaps. */
1965 else
1966 {
1967 *overlaps_a = conflict_fn_no_dependence ();
1968 *overlaps_b = conflict_fn_no_dependence ();
1969 *last_conflicts = integer_zero_node;
1970 dependence_stats.num_siv_independent++;
1971 return;
1972 }
1973 }
1974 else
1975 {
1976 /* Example:
1977 chrec_a = 3
1978 chrec_b = {4, +, 1}
1979
1980 In this case, chrec_a will not overlap with chrec_b. */
1981 *overlaps_a = conflict_fn_no_dependence ();
1982 *overlaps_b = conflict_fn_no_dependence ();
1983 *last_conflicts = integer_zero_node;
1984 dependence_stats.num_siv_independent++;
1985 return;
1986 }
1987 }
1988 }
1989 }
1990 }
1991
1992 /* Helper recursive function for initializing the matrix A. Returns
1993 the initial value of CHREC. */
1994
1995 static tree
1996 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1997 {
1998 gcc_assert (chrec);
1999
2000 switch (TREE_CODE (chrec))
2001 {
2002 case POLYNOMIAL_CHREC:
2003 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2004
2005 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2006 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2007
2008 case PLUS_EXPR:
2009 case MULT_EXPR:
2010 case MINUS_EXPR:
2011 {
2012 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2013 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2014
2015 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2016 }
2017
2018 case NOP_EXPR:
2019 {
2020 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2021 return chrec_convert (chrec_type (chrec), op, NULL);
2022 }
2023
2024 case BIT_NOT_EXPR:
2025 {
2026 /* Handle ~X as -1 - X. */
2027 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2028 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2029 build_int_cst (TREE_TYPE (chrec), -1), op);
2030 }
2031
2032 case INTEGER_CST:
2033 return chrec;
2034
2035 default:
2036 gcc_unreachable ();
2037 return NULL_TREE;
2038 }
2039 }
2040
2041 #define FLOOR_DIV(x,y) ((x) / (y))
2042
2043 /* Solves the special case of the Diophantine equation:
2044 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2045
2046 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2047 number of iterations that loops X and Y run. The overlaps will be
2048 constructed as evolutions in dimension DIM. */
2049
2050 static void
2051 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2052 affine_fn *overlaps_a,
2053 affine_fn *overlaps_b,
2054 tree *last_conflicts, int dim)
2055 {
2056 if (((step_a > 0 && step_b > 0)
2057 || (step_a < 0 && step_b < 0)))
2058 {
2059 int step_overlaps_a, step_overlaps_b;
2060 int gcd_steps_a_b, last_conflict, tau2;
2061
2062 gcd_steps_a_b = gcd (step_a, step_b);
2063 step_overlaps_a = step_b / gcd_steps_a_b;
2064 step_overlaps_b = step_a / gcd_steps_a_b;
2065
2066 if (niter > 0)
2067 {
2068 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2069 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2070 last_conflict = tau2;
2071 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2072 }
2073 else
2074 *last_conflicts = chrec_dont_know;
2075
2076 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2077 build_int_cst (NULL_TREE,
2078 step_overlaps_a));
2079 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2080 build_int_cst (NULL_TREE,
2081 step_overlaps_b));
2082 }
2083
2084 else
2085 {
2086 *overlaps_a = affine_fn_cst (integer_zero_node);
2087 *overlaps_b = affine_fn_cst (integer_zero_node);
2088 *last_conflicts = integer_zero_node;
2089 }
2090 }
2091
2092 /* Solves the special case of a Diophantine equation where CHREC_A is
2093 an affine bivariate function, and CHREC_B is an affine univariate
2094 function. For example,
2095
2096 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2097
2098 has the following overlapping functions:
2099
2100 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2101 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2102 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2103
2104 FORNOW: This is a specialized implementation for a case occurring in
2105 a common benchmark. Implement the general algorithm. */
2106
2107 static void
2108 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2109 conflict_function **overlaps_a,
2110 conflict_function **overlaps_b,
2111 tree *last_conflicts)
2112 {
2113 bool xz_p, yz_p, xyz_p;
2114 int step_x, step_y, step_z;
2115 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2116 affine_fn overlaps_a_xz, overlaps_b_xz;
2117 affine_fn overlaps_a_yz, overlaps_b_yz;
2118 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2119 affine_fn ova1, ova2, ovb;
2120 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2121
2122 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2123 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2124 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2125
2126 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2127 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2128 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2129
2130 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2131 {
2132 if (dump_file && (dump_flags & TDF_DETAILS))
2133 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2134
2135 *overlaps_a = conflict_fn_not_known ();
2136 *overlaps_b = conflict_fn_not_known ();
2137 *last_conflicts = chrec_dont_know;
2138 return;
2139 }
2140
2141 niter = MIN (niter_x, niter_z);
2142 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2143 &overlaps_a_xz,
2144 &overlaps_b_xz,
2145 &last_conflicts_xz, 1);
2146 niter = MIN (niter_y, niter_z);
2147 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2148 &overlaps_a_yz,
2149 &overlaps_b_yz,
2150 &last_conflicts_yz, 2);
2151 niter = MIN (niter_x, niter_z);
2152 niter = MIN (niter_y, niter);
2153 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2154 &overlaps_a_xyz,
2155 &overlaps_b_xyz,
2156 &last_conflicts_xyz, 3);
2157
2158 xz_p = !integer_zerop (last_conflicts_xz);
2159 yz_p = !integer_zerop (last_conflicts_yz);
2160 xyz_p = !integer_zerop (last_conflicts_xyz);
2161
2162 if (xz_p || yz_p || xyz_p)
2163 {
2164 ova1 = affine_fn_cst (integer_zero_node);
2165 ova2 = affine_fn_cst (integer_zero_node);
2166 ovb = affine_fn_cst (integer_zero_node);
2167 if (xz_p)
2168 {
2169 affine_fn t0 = ova1;
2170 affine_fn t2 = ovb;
2171
2172 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2173 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2174 affine_fn_free (t0);
2175 affine_fn_free (t2);
2176 *last_conflicts = last_conflicts_xz;
2177 }
2178 if (yz_p)
2179 {
2180 affine_fn t0 = ova2;
2181 affine_fn t2 = ovb;
2182
2183 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2184 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2185 affine_fn_free (t0);
2186 affine_fn_free (t2);
2187 *last_conflicts = last_conflicts_yz;
2188 }
2189 if (xyz_p)
2190 {
2191 affine_fn t0 = ova1;
2192 affine_fn t2 = ova2;
2193 affine_fn t4 = ovb;
2194
2195 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2196 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2197 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2198 affine_fn_free (t0);
2199 affine_fn_free (t2);
2200 affine_fn_free (t4);
2201 *last_conflicts = last_conflicts_xyz;
2202 }
2203 *overlaps_a = conflict_fn (2, ova1, ova2);
2204 *overlaps_b = conflict_fn (1, ovb);
2205 }
2206 else
2207 {
2208 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2209 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2210 *last_conflicts = integer_zero_node;
2211 }
2212
2213 affine_fn_free (overlaps_a_xz);
2214 affine_fn_free (overlaps_b_xz);
2215 affine_fn_free (overlaps_a_yz);
2216 affine_fn_free (overlaps_b_yz);
2217 affine_fn_free (overlaps_a_xyz);
2218 affine_fn_free (overlaps_b_xyz);
2219 }
2220
2221 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2222
2223 static void
2224 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2225 int size)
2226 {
2227 memcpy (vec2, vec1, size * sizeof (*vec1));
2228 }
2229
2230 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2231
2232 static void
2233 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2234 int m, int n)
2235 {
2236 int i;
2237
2238 for (i = 0; i < m; i++)
2239 lambda_vector_copy (mat1[i], mat2[i], n);
2240 }
2241
2242 /* Store the N x N identity matrix in MAT. */
2243
2244 static void
2245 lambda_matrix_id (lambda_matrix mat, int size)
2246 {
2247 int i, j;
2248
2249 for (i = 0; i < size; i++)
2250 for (j = 0; j < size; j++)
2251 mat[i][j] = (i == j) ? 1 : 0;
2252 }
2253
2254 /* Return the first nonzero element of vector VEC1 between START and N.
2255 We must have START <= N. Returns N if VEC1 is the zero vector. */
2256
2257 static int
2258 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2259 {
2260 int j = start;
2261 while (j < n && vec1[j] == 0)
2262 j++;
2263 return j;
2264 }
2265
2266 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2267 R2 = R2 + CONST1 * R1. */
2268
2269 static void
2270 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2271 {
2272 int i;
2273
2274 if (const1 == 0)
2275 return;
2276
2277 for (i = 0; i < n; i++)
2278 mat[r2][i] += const1 * mat[r1][i];
2279 }
2280
2281 /* Swap rows R1 and R2 in matrix MAT. */
2282
2283 static void
2284 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2285 {
2286 lambda_vector row;
2287
2288 row = mat[r1];
2289 mat[r1] = mat[r2];
2290 mat[r2] = row;
2291 }
2292
2293 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2294 and store the result in VEC2. */
2295
2296 static void
2297 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2298 int size, int const1)
2299 {
2300 int i;
2301
2302 if (const1 == 0)
2303 lambda_vector_clear (vec2, size);
2304 else
2305 for (i = 0; i < size; i++)
2306 vec2[i] = const1 * vec1[i];
2307 }
2308
2309 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2310
2311 static void
2312 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2313 int size)
2314 {
2315 lambda_vector_mult_const (vec1, vec2, size, -1);
2316 }
2317
2318 /* Negate row R1 of matrix MAT which has N columns. */
2319
2320 static void
2321 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2322 {
2323 lambda_vector_negate (mat[r1], mat[r1], n);
2324 }
2325
2326 /* Return true if two vectors are equal. */
2327
2328 static bool
2329 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2330 {
2331 int i;
2332 for (i = 0; i < size; i++)
2333 if (vec1[i] != vec2[i])
2334 return false;
2335 return true;
2336 }
2337
2338 /* Given an M x N integer matrix A, this function determines an M x
2339 M unimodular matrix U, and an M x N echelon matrix S such that
2340 "U.A = S". This decomposition is also known as "right Hermite".
2341
2342 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2343 Restructuring Compilers" Utpal Banerjee. */
2344
2345 static void
2346 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2347 lambda_matrix S, lambda_matrix U)
2348 {
2349 int i, j, i0 = 0;
2350
2351 lambda_matrix_copy (A, S, m, n);
2352 lambda_matrix_id (U, m);
2353
2354 for (j = 0; j < n; j++)
2355 {
2356 if (lambda_vector_first_nz (S[j], m, i0) < m)
2357 {
2358 ++i0;
2359 for (i = m - 1; i >= i0; i--)
2360 {
2361 while (S[i][j] != 0)
2362 {
2363 int sigma, factor, a, b;
2364
2365 a = S[i-1][j];
2366 b = S[i][j];
2367 sigma = (a * b < 0) ? -1: 1;
2368 a = abs (a);
2369 b = abs (b);
2370 factor = sigma * (a / b);
2371
2372 lambda_matrix_row_add (S, n, i, i-1, -factor);
2373 lambda_matrix_row_exchange (S, i, i-1);
2374
2375 lambda_matrix_row_add (U, m, i, i-1, -factor);
2376 lambda_matrix_row_exchange (U, i, i-1);
2377 }
2378 }
2379 }
2380 }
2381 }
2382
2383 /* Determines the overlapping elements due to accesses CHREC_A and
2384 CHREC_B, that are affine functions. This function cannot handle
2385 symbolic evolution functions, ie. when initial conditions are
2386 parameters, because it uses lambda matrices of integers. */
2387
2388 static void
2389 analyze_subscript_affine_affine (tree chrec_a,
2390 tree chrec_b,
2391 conflict_function **overlaps_a,
2392 conflict_function **overlaps_b,
2393 tree *last_conflicts)
2394 {
2395 unsigned nb_vars_a, nb_vars_b, dim;
2396 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2397 lambda_matrix A, U, S;
2398 struct obstack scratch_obstack;
2399
2400 if (eq_evolutions_p (chrec_a, chrec_b))
2401 {
2402 /* The accessed index overlaps for each iteration in the
2403 loop. */
2404 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2405 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2406 *last_conflicts = chrec_dont_know;
2407 return;
2408 }
2409 if (dump_file && (dump_flags & TDF_DETAILS))
2410 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2411
2412 /* For determining the initial intersection, we have to solve a
2413 Diophantine equation. This is the most time consuming part.
2414
2415 For answering to the question: "Is there a dependence?" we have
2416 to prove that there exists a solution to the Diophantine
2417 equation, and that the solution is in the iteration domain,
2418 i.e. the solution is positive or zero, and that the solution
2419 happens before the upper bound loop.nb_iterations. Otherwise
2420 there is no dependence. This function outputs a description of
2421 the iterations that hold the intersections. */
2422
2423 nb_vars_a = nb_vars_in_chrec (chrec_a);
2424 nb_vars_b = nb_vars_in_chrec (chrec_b);
2425
2426 gcc_obstack_init (&scratch_obstack);
2427
2428 dim = nb_vars_a + nb_vars_b;
2429 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2430 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2431 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2432
2433 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2434 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2435 gamma = init_b - init_a;
2436
2437 /* Don't do all the hard work of solving the Diophantine equation
2438 when we already know the solution: for example,
2439 | {3, +, 1}_1
2440 | {3, +, 4}_2
2441 | gamma = 3 - 3 = 0.
2442 Then the first overlap occurs during the first iterations:
2443 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2444 */
2445 if (gamma == 0)
2446 {
2447 if (nb_vars_a == 1 && nb_vars_b == 1)
2448 {
2449 HOST_WIDE_INT step_a, step_b;
2450 HOST_WIDE_INT niter, niter_a, niter_b;
2451 affine_fn ova, ovb;
2452
2453 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2454 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2455 niter = MIN (niter_a, niter_b);
2456 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2457 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2458
2459 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2460 &ova, &ovb,
2461 last_conflicts, 1);
2462 *overlaps_a = conflict_fn (1, ova);
2463 *overlaps_b = conflict_fn (1, ovb);
2464 }
2465
2466 else if (nb_vars_a == 2 && nb_vars_b == 1)
2467 compute_overlap_steps_for_affine_1_2
2468 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2469
2470 else if (nb_vars_a == 1 && nb_vars_b == 2)
2471 compute_overlap_steps_for_affine_1_2
2472 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2473
2474 else
2475 {
2476 if (dump_file && (dump_flags & TDF_DETAILS))
2477 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2478 *overlaps_a = conflict_fn_not_known ();
2479 *overlaps_b = conflict_fn_not_known ();
2480 *last_conflicts = chrec_dont_know;
2481 }
2482 goto end_analyze_subs_aa;
2483 }
2484
2485 /* U.A = S */
2486 lambda_matrix_right_hermite (A, dim, 1, S, U);
2487
2488 if (S[0][0] < 0)
2489 {
2490 S[0][0] *= -1;
2491 lambda_matrix_row_negate (U, dim, 0);
2492 }
2493 gcd_alpha_beta = S[0][0];
2494
2495 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2496 but that is a quite strange case. Instead of ICEing, answer
2497 don't know. */
2498 if (gcd_alpha_beta == 0)
2499 {
2500 *overlaps_a = conflict_fn_not_known ();
2501 *overlaps_b = conflict_fn_not_known ();
2502 *last_conflicts = chrec_dont_know;
2503 goto end_analyze_subs_aa;
2504 }
2505
2506 /* The classic "gcd-test". */
2507 if (!int_divides_p (gcd_alpha_beta, gamma))
2508 {
2509 /* The "gcd-test" has determined that there is no integer
2510 solution, i.e. there is no dependence. */
2511 *overlaps_a = conflict_fn_no_dependence ();
2512 *overlaps_b = conflict_fn_no_dependence ();
2513 *last_conflicts = integer_zero_node;
2514 }
2515
2516 /* Both access functions are univariate. This includes SIV and MIV cases. */
2517 else if (nb_vars_a == 1 && nb_vars_b == 1)
2518 {
2519 /* Both functions should have the same evolution sign. */
2520 if (((A[0][0] > 0 && -A[1][0] > 0)
2521 || (A[0][0] < 0 && -A[1][0] < 0)))
2522 {
2523 /* The solutions are given by:
2524 |
2525 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2526 | [u21 u22] [y0]
2527
2528 For a given integer t. Using the following variables,
2529
2530 | i0 = u11 * gamma / gcd_alpha_beta
2531 | j0 = u12 * gamma / gcd_alpha_beta
2532 | i1 = u21
2533 | j1 = u22
2534
2535 the solutions are:
2536
2537 | x0 = i0 + i1 * t,
2538 | y0 = j0 + j1 * t. */
2539 HOST_WIDE_INT i0, j0, i1, j1;
2540
2541 i0 = U[0][0] * gamma / gcd_alpha_beta;
2542 j0 = U[0][1] * gamma / gcd_alpha_beta;
2543 i1 = U[1][0];
2544 j1 = U[1][1];
2545
2546 if ((i1 == 0 && i0 < 0)
2547 || (j1 == 0 && j0 < 0))
2548 {
2549 /* There is no solution.
2550 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2551 falls in here, but for the moment we don't look at the
2552 upper bound of the iteration domain. */
2553 *overlaps_a = conflict_fn_no_dependence ();
2554 *overlaps_b = conflict_fn_no_dependence ();
2555 *last_conflicts = integer_zero_node;
2556 goto end_analyze_subs_aa;
2557 }
2558
2559 if (i1 > 0 && j1 > 0)
2560 {
2561 HOST_WIDE_INT niter_a
2562 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2563 HOST_WIDE_INT niter_b
2564 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2565 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2566
2567 /* (X0, Y0) is a solution of the Diophantine equation:
2568 "chrec_a (X0) = chrec_b (Y0)". */
2569 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2570 CEIL (-j0, j1));
2571 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2572 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2573
2574 /* (X1, Y1) is the smallest positive solution of the eq
2575 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2576 first conflict occurs. */
2577 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2578 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2579 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2580
2581 if (niter > 0)
2582 {
2583 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2584 FLOOR_DIV (niter - j0, j1));
2585 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2586
2587 /* If the overlap occurs outside of the bounds of the
2588 loop, there is no dependence. */
2589 if (x1 >= niter || y1 >= niter)
2590 {
2591 *overlaps_a = conflict_fn_no_dependence ();
2592 *overlaps_b = conflict_fn_no_dependence ();
2593 *last_conflicts = integer_zero_node;
2594 goto end_analyze_subs_aa;
2595 }
2596 else
2597 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2598 }
2599 else
2600 *last_conflicts = chrec_dont_know;
2601
2602 *overlaps_a
2603 = conflict_fn (1,
2604 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2605 1,
2606 build_int_cst (NULL_TREE, i1)));
2607 *overlaps_b
2608 = conflict_fn (1,
2609 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2610 1,
2611 build_int_cst (NULL_TREE, j1)));
2612 }
2613 else
2614 {
2615 /* FIXME: For the moment, the upper bound of the
2616 iteration domain for i and j is not checked. */
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2618 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2619 *overlaps_a = conflict_fn_not_known ();
2620 *overlaps_b = conflict_fn_not_known ();
2621 *last_conflicts = chrec_dont_know;
2622 }
2623 }
2624 else
2625 {
2626 if (dump_file && (dump_flags & TDF_DETAILS))
2627 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2628 *overlaps_a = conflict_fn_not_known ();
2629 *overlaps_b = conflict_fn_not_known ();
2630 *last_conflicts = chrec_dont_know;
2631 }
2632 }
2633 else
2634 {
2635 if (dump_file && (dump_flags & TDF_DETAILS))
2636 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2637 *overlaps_a = conflict_fn_not_known ();
2638 *overlaps_b = conflict_fn_not_known ();
2639 *last_conflicts = chrec_dont_know;
2640 }
2641
2642 end_analyze_subs_aa:
2643 obstack_free (&scratch_obstack, NULL);
2644 if (dump_file && (dump_flags & TDF_DETAILS))
2645 {
2646 fprintf (dump_file, " (overlaps_a = ");
2647 dump_conflict_function (dump_file, *overlaps_a);
2648 fprintf (dump_file, ")\n (overlaps_b = ");
2649 dump_conflict_function (dump_file, *overlaps_b);
2650 fprintf (dump_file, ")\n");
2651 fprintf (dump_file, ")\n");
2652 }
2653 }
2654
2655 /* Returns true when analyze_subscript_affine_affine can be used for
2656 determining the dependence relation between chrec_a and chrec_b,
2657 that contain symbols. This function modifies chrec_a and chrec_b
2658 such that the analysis result is the same, and such that they don't
2659 contain symbols, and then can safely be passed to the analyzer.
2660
2661 Example: The analysis of the following tuples of evolutions produce
2662 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2663 vs. {0, +, 1}_1
2664
2665 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2666 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2667 */
2668
2669 static bool
2670 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2671 {
2672 tree diff, type, left_a, left_b, right_b;
2673
2674 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2675 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2676 /* FIXME: For the moment not handled. Might be refined later. */
2677 return false;
2678
2679 type = chrec_type (*chrec_a);
2680 left_a = CHREC_LEFT (*chrec_a);
2681 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2682 diff = chrec_fold_minus (type, left_a, left_b);
2683
2684 if (!evolution_function_is_constant_p (diff))
2685 return false;
2686
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2689
2690 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2691 diff, CHREC_RIGHT (*chrec_a));
2692 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2693 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2694 build_int_cst (type, 0),
2695 right_b);
2696 return true;
2697 }
2698
2699 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2700 *OVERLAPS_B are initialized to the functions that describe the
2701 relation between the elements accessed twice by CHREC_A and
2702 CHREC_B. For k >= 0, the following property is verified:
2703
2704 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2705
2706 static void
2707 analyze_siv_subscript (tree chrec_a,
2708 tree chrec_b,
2709 conflict_function **overlaps_a,
2710 conflict_function **overlaps_b,
2711 tree *last_conflicts,
2712 int loop_nest_num)
2713 {
2714 dependence_stats.num_siv++;
2715
2716 if (dump_file && (dump_flags & TDF_DETAILS))
2717 fprintf (dump_file, "(analyze_siv_subscript \n");
2718
2719 if (evolution_function_is_constant_p (chrec_a)
2720 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2721 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2722 overlaps_a, overlaps_b, last_conflicts);
2723
2724 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2725 && evolution_function_is_constant_p (chrec_b))
2726 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2727 overlaps_b, overlaps_a, last_conflicts);
2728
2729 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2730 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2731 {
2732 if (!chrec_contains_symbols (chrec_a)
2733 && !chrec_contains_symbols (chrec_b))
2734 {
2735 analyze_subscript_affine_affine (chrec_a, chrec_b,
2736 overlaps_a, overlaps_b,
2737 last_conflicts);
2738
2739 if (CF_NOT_KNOWN_P (*overlaps_a)
2740 || CF_NOT_KNOWN_P (*overlaps_b))
2741 dependence_stats.num_siv_unimplemented++;
2742 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2743 || CF_NO_DEPENDENCE_P (*overlaps_b))
2744 dependence_stats.num_siv_independent++;
2745 else
2746 dependence_stats.num_siv_dependent++;
2747 }
2748 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2749 &chrec_b))
2750 {
2751 analyze_subscript_affine_affine (chrec_a, chrec_b,
2752 overlaps_a, overlaps_b,
2753 last_conflicts);
2754
2755 if (CF_NOT_KNOWN_P (*overlaps_a)
2756 || CF_NOT_KNOWN_P (*overlaps_b))
2757 dependence_stats.num_siv_unimplemented++;
2758 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2759 || CF_NO_DEPENDENCE_P (*overlaps_b))
2760 dependence_stats.num_siv_independent++;
2761 else
2762 dependence_stats.num_siv_dependent++;
2763 }
2764 else
2765 goto siv_subscript_dontknow;
2766 }
2767
2768 else
2769 {
2770 siv_subscript_dontknow:;
2771 if (dump_file && (dump_flags & TDF_DETAILS))
2772 fprintf (dump_file, "siv test failed: unimplemented.\n");
2773 *overlaps_a = conflict_fn_not_known ();
2774 *overlaps_b = conflict_fn_not_known ();
2775 *last_conflicts = chrec_dont_know;
2776 dependence_stats.num_siv_unimplemented++;
2777 }
2778
2779 if (dump_file && (dump_flags & TDF_DETAILS))
2780 fprintf (dump_file, ")\n");
2781 }
2782
2783 /* Returns false if we can prove that the greatest common divisor of the steps
2784 of CHREC does not divide CST, false otherwise. */
2785
2786 static bool
2787 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2788 {
2789 HOST_WIDE_INT cd = 0, val;
2790 tree step;
2791
2792 if (!host_integerp (cst, 0))
2793 return true;
2794 val = tree_low_cst (cst, 0);
2795
2796 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2797 {
2798 step = CHREC_RIGHT (chrec);
2799 if (!host_integerp (step, 0))
2800 return true;
2801 cd = gcd (cd, tree_low_cst (step, 0));
2802 chrec = CHREC_LEFT (chrec);
2803 }
2804
2805 return val % cd == 0;
2806 }
2807
2808 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2809 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2810 functions that describe the relation between the elements accessed
2811 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2812 is verified:
2813
2814 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2815
2816 static void
2817 analyze_miv_subscript (tree chrec_a,
2818 tree chrec_b,
2819 conflict_function **overlaps_a,
2820 conflict_function **overlaps_b,
2821 tree *last_conflicts,
2822 struct loop *loop_nest)
2823 {
2824 tree type, difference;
2825
2826 dependence_stats.num_miv++;
2827 if (dump_file && (dump_flags & TDF_DETAILS))
2828 fprintf (dump_file, "(analyze_miv_subscript \n");
2829
2830 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2831 chrec_a = chrec_convert (type, chrec_a, NULL);
2832 chrec_b = chrec_convert (type, chrec_b, NULL);
2833 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2834
2835 if (eq_evolutions_p (chrec_a, chrec_b))
2836 {
2837 /* Access functions are the same: all the elements are accessed
2838 in the same order. */
2839 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2840 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2841 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2842 dependence_stats.num_miv_dependent++;
2843 }
2844
2845 else if (evolution_function_is_constant_p (difference)
2846 /* For the moment, the following is verified:
2847 evolution_function_is_affine_multivariate_p (chrec_a,
2848 loop_nest->num) */
2849 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2850 {
2851 /* testsuite/.../ssa-chrec-33.c
2852 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2853
2854 The difference is 1, and all the evolution steps are multiples
2855 of 2, consequently there are no overlapping elements. */
2856 *overlaps_a = conflict_fn_no_dependence ();
2857 *overlaps_b = conflict_fn_no_dependence ();
2858 *last_conflicts = integer_zero_node;
2859 dependence_stats.num_miv_independent++;
2860 }
2861
2862 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2863 && !chrec_contains_symbols (chrec_a)
2864 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2865 && !chrec_contains_symbols (chrec_b))
2866 {
2867 /* testsuite/.../ssa-chrec-35.c
2868 {0, +, 1}_2 vs. {0, +, 1}_3
2869 the overlapping elements are respectively located at iterations:
2870 {0, +, 1}_x and {0, +, 1}_x,
2871 in other words, we have the equality:
2872 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2873
2874 Other examples:
2875 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2876 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2877
2878 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2879 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2880 */
2881 analyze_subscript_affine_affine (chrec_a, chrec_b,
2882 overlaps_a, overlaps_b, last_conflicts);
2883
2884 if (CF_NOT_KNOWN_P (*overlaps_a)
2885 || CF_NOT_KNOWN_P (*overlaps_b))
2886 dependence_stats.num_miv_unimplemented++;
2887 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2888 || CF_NO_DEPENDENCE_P (*overlaps_b))
2889 dependence_stats.num_miv_independent++;
2890 else
2891 dependence_stats.num_miv_dependent++;
2892 }
2893
2894 else
2895 {
2896 /* When the analysis is too difficult, answer "don't know". */
2897 if (dump_file && (dump_flags & TDF_DETAILS))
2898 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2899
2900 *overlaps_a = conflict_fn_not_known ();
2901 *overlaps_b = conflict_fn_not_known ();
2902 *last_conflicts = chrec_dont_know;
2903 dependence_stats.num_miv_unimplemented++;
2904 }
2905
2906 if (dump_file && (dump_flags & TDF_DETAILS))
2907 fprintf (dump_file, ")\n");
2908 }
2909
2910 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2911 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2912 OVERLAP_ITERATIONS_B are initialized with two functions that
2913 describe the iterations that contain conflicting elements.
2914
2915 Remark: For an integer k >= 0, the following equality is true:
2916
2917 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2918 */
2919
2920 static void
2921 analyze_overlapping_iterations (tree chrec_a,
2922 tree chrec_b,
2923 conflict_function **overlap_iterations_a,
2924 conflict_function **overlap_iterations_b,
2925 tree *last_conflicts, struct loop *loop_nest)
2926 {
2927 unsigned int lnn = loop_nest->num;
2928
2929 dependence_stats.num_subscript_tests++;
2930
2931 if (dump_file && (dump_flags & TDF_DETAILS))
2932 {
2933 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2934 fprintf (dump_file, " (chrec_a = ");
2935 print_generic_expr (dump_file, chrec_a, 0);
2936 fprintf (dump_file, ")\n (chrec_b = ");
2937 print_generic_expr (dump_file, chrec_b, 0);
2938 fprintf (dump_file, ")\n");
2939 }
2940
2941 if (chrec_a == NULL_TREE
2942 || chrec_b == NULL_TREE
2943 || chrec_contains_undetermined (chrec_a)
2944 || chrec_contains_undetermined (chrec_b))
2945 {
2946 dependence_stats.num_subscript_undetermined++;
2947
2948 *overlap_iterations_a = conflict_fn_not_known ();
2949 *overlap_iterations_b = conflict_fn_not_known ();
2950 }
2951
2952 /* If they are the same chrec, and are affine, they overlap
2953 on every iteration. */
2954 else if (eq_evolutions_p (chrec_a, chrec_b)
2955 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2956 || operand_equal_p (chrec_a, chrec_b, 0)))
2957 {
2958 dependence_stats.num_same_subscript_function++;
2959 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2960 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2961 *last_conflicts = chrec_dont_know;
2962 }
2963
2964 /* If they aren't the same, and aren't affine, we can't do anything
2965 yet. */
2966 else if ((chrec_contains_symbols (chrec_a)
2967 || chrec_contains_symbols (chrec_b))
2968 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2969 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2970 {
2971 dependence_stats.num_subscript_undetermined++;
2972 *overlap_iterations_a = conflict_fn_not_known ();
2973 *overlap_iterations_b = conflict_fn_not_known ();
2974 }
2975
2976 else if (ziv_subscript_p (chrec_a, chrec_b))
2977 analyze_ziv_subscript (chrec_a, chrec_b,
2978 overlap_iterations_a, overlap_iterations_b,
2979 last_conflicts);
2980
2981 else if (siv_subscript_p (chrec_a, chrec_b))
2982 analyze_siv_subscript (chrec_a, chrec_b,
2983 overlap_iterations_a, overlap_iterations_b,
2984 last_conflicts, lnn);
2985
2986 else
2987 analyze_miv_subscript (chrec_a, chrec_b,
2988 overlap_iterations_a, overlap_iterations_b,
2989 last_conflicts, loop_nest);
2990
2991 if (dump_file && (dump_flags & TDF_DETAILS))
2992 {
2993 fprintf (dump_file, " (overlap_iterations_a = ");
2994 dump_conflict_function (dump_file, *overlap_iterations_a);
2995 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2996 dump_conflict_function (dump_file, *overlap_iterations_b);
2997 fprintf (dump_file, ")\n");
2998 fprintf (dump_file, ")\n");
2999 }
3000 }
3001
3002 /* Helper function for uniquely inserting distance vectors. */
3003
3004 static void
3005 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3006 {
3007 unsigned i;
3008 lambda_vector v;
3009
3010 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3011 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3012 return;
3013
3014 DDR_DIST_VECTS (ddr).safe_push (dist_v);
3015 }
3016
3017 /* Helper function for uniquely inserting direction vectors. */
3018
3019 static void
3020 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3021 {
3022 unsigned i;
3023 lambda_vector v;
3024
3025 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3026 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3027 return;
3028
3029 DDR_DIR_VECTS (ddr).safe_push (dir_v);
3030 }
3031
3032 /* Add a distance of 1 on all the loops outer than INDEX. If we
3033 haven't yet determined a distance for this outer loop, push a new
3034 distance vector composed of the previous distance, and a distance
3035 of 1 for this outer loop. Example:
3036
3037 | loop_1
3038 | loop_2
3039 | A[10]
3040 | endloop_2
3041 | endloop_1
3042
3043 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3044 save (0, 1), then we have to save (1, 0). */
3045
3046 static void
3047 add_outer_distances (struct data_dependence_relation *ddr,
3048 lambda_vector dist_v, int index)
3049 {
3050 /* For each outer loop where init_v is not set, the accesses are
3051 in dependence of distance 1 in the loop. */
3052 while (--index >= 0)
3053 {
3054 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3055 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3056 save_v[index] = 1;
3057 save_dist_v (ddr, save_v);
3058 }
3059 }
3060
3061 /* Return false when fail to represent the data dependence as a
3062 distance vector. INIT_B is set to true when a component has been
3063 added to the distance vector DIST_V. INDEX_CARRY is then set to
3064 the index in DIST_V that carries the dependence. */
3065
3066 static bool
3067 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3068 struct data_reference *ddr_a,
3069 struct data_reference *ddr_b,
3070 lambda_vector dist_v, bool *init_b,
3071 int *index_carry)
3072 {
3073 unsigned i;
3074 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3075
3076 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3077 {
3078 tree access_fn_a, access_fn_b;
3079 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3080
3081 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3082 {
3083 non_affine_dependence_relation (ddr);
3084 return false;
3085 }
3086
3087 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3088 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3089
3090 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3091 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3092 {
3093 int dist, index;
3094 int var_a = CHREC_VARIABLE (access_fn_a);
3095 int var_b = CHREC_VARIABLE (access_fn_b);
3096
3097 if (var_a != var_b
3098 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3099 {
3100 non_affine_dependence_relation (ddr);
3101 return false;
3102 }
3103
3104 dist = int_cst_value (SUB_DISTANCE (subscript));
3105 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3106 *index_carry = MIN (index, *index_carry);
3107
3108 /* This is the subscript coupling test. If we have already
3109 recorded a distance for this loop (a distance coming from
3110 another subscript), it should be the same. For example,
3111 in the following code, there is no dependence:
3112
3113 | loop i = 0, N, 1
3114 | T[i+1][i] = ...
3115 | ... = T[i][i]
3116 | endloop
3117 */
3118 if (init_v[index] != 0 && dist_v[index] != dist)
3119 {
3120 finalize_ddr_dependent (ddr, chrec_known);
3121 return false;
3122 }
3123
3124 dist_v[index] = dist;
3125 init_v[index] = 1;
3126 *init_b = true;
3127 }
3128 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3129 {
3130 /* This can be for example an affine vs. constant dependence
3131 (T[i] vs. T[3]) that is not an affine dependence and is
3132 not representable as a distance vector. */
3133 non_affine_dependence_relation (ddr);
3134 return false;
3135 }
3136 }
3137
3138 return true;
3139 }
3140
3141 /* Return true when the DDR contains only constant access functions. */
3142
3143 static bool
3144 constant_access_functions (const struct data_dependence_relation *ddr)
3145 {
3146 unsigned i;
3147
3148 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3149 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3150 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3151 return false;
3152
3153 return true;
3154 }
3155
3156 /* Helper function for the case where DDR_A and DDR_B are the same
3157 multivariate access function with a constant step. For an example
3158 see pr34635-1.c. */
3159
3160 static void
3161 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3162 {
3163 int x_1, x_2;
3164 tree c_1 = CHREC_LEFT (c_2);
3165 tree c_0 = CHREC_LEFT (c_1);
3166 lambda_vector dist_v;
3167 int v1, v2, cd;
3168
3169 /* Polynomials with more than 2 variables are not handled yet. When
3170 the evolution steps are parameters, it is not possible to
3171 represent the dependence using classical distance vectors. */
3172 if (TREE_CODE (c_0) != INTEGER_CST
3173 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3174 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3175 {
3176 DDR_AFFINE_P (ddr) = false;
3177 return;
3178 }
3179
3180 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3181 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3182
3183 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3184 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3185 v1 = int_cst_value (CHREC_RIGHT (c_1));
3186 v2 = int_cst_value (CHREC_RIGHT (c_2));
3187 cd = gcd (v1, v2);
3188 v1 /= cd;
3189 v2 /= cd;
3190
3191 if (v2 < 0)
3192 {
3193 v2 = -v2;
3194 v1 = -v1;
3195 }
3196
3197 dist_v[x_1] = v2;
3198 dist_v[x_2] = -v1;
3199 save_dist_v (ddr, dist_v);
3200
3201 add_outer_distances (ddr, dist_v, x_1);
3202 }
3203
3204 /* Helper function for the case where DDR_A and DDR_B are the same
3205 access functions. */
3206
3207 static void
3208 add_other_self_distances (struct data_dependence_relation *ddr)
3209 {
3210 lambda_vector dist_v;
3211 unsigned i;
3212 int index_carry = DDR_NB_LOOPS (ddr);
3213
3214 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3215 {
3216 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3217
3218 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3219 {
3220 if (!evolution_function_is_univariate_p (access_fun))
3221 {
3222 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3223 {
3224 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3225 return;
3226 }
3227
3228 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3229
3230 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3231 add_multivariate_self_dist (ddr, access_fun);
3232 else
3233 /* The evolution step is not constant: it varies in
3234 the outer loop, so this cannot be represented by a
3235 distance vector. For example in pr34635.c the
3236 evolution is {0, +, {0, +, 4}_1}_2. */
3237 DDR_AFFINE_P (ddr) = false;
3238
3239 return;
3240 }
3241
3242 index_carry = MIN (index_carry,
3243 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3244 DDR_LOOP_NEST (ddr)));
3245 }
3246 }
3247
3248 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3249 add_outer_distances (ddr, dist_v, index_carry);
3250 }
3251
3252 static void
3253 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3254 {
3255 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3256
3257 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3258 save_dist_v (ddr, dist_v);
3259 }
3260
3261 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3262 is the case for example when access functions are the same and
3263 equal to a constant, as in:
3264
3265 | loop_1
3266 | A[3] = ...
3267 | ... = A[3]
3268 | endloop_1
3269
3270 in which case the distance vectors are (0) and (1). */
3271
3272 static void
3273 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3274 {
3275 unsigned i, j;
3276
3277 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3278 {
3279 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3280 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3281 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3282
3283 for (j = 0; j < ca->n; j++)
3284 if (affine_function_zero_p (ca->fns[j]))
3285 {
3286 insert_innermost_unit_dist_vector (ddr);
3287 return;
3288 }
3289
3290 for (j = 0; j < cb->n; j++)
3291 if (affine_function_zero_p (cb->fns[j]))
3292 {
3293 insert_innermost_unit_dist_vector (ddr);
3294 return;
3295 }
3296 }
3297 }
3298
3299 /* Compute the classic per loop distance vector. DDR is the data
3300 dependence relation to build a vector from. Return false when fail
3301 to represent the data dependence as a distance vector. */
3302
3303 static bool
3304 build_classic_dist_vector (struct data_dependence_relation *ddr,
3305 struct loop *loop_nest)
3306 {
3307 bool init_b = false;
3308 int index_carry = DDR_NB_LOOPS (ddr);
3309 lambda_vector dist_v;
3310
3311 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3312 return false;
3313
3314 if (same_access_functions (ddr))
3315 {
3316 /* Save the 0 vector. */
3317 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3318 save_dist_v (ddr, dist_v);
3319
3320 if (constant_access_functions (ddr))
3321 add_distance_for_zero_overlaps (ddr);
3322
3323 if (DDR_NB_LOOPS (ddr) > 1)
3324 add_other_self_distances (ddr);
3325
3326 return true;
3327 }
3328
3329 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3330 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3331 dist_v, &init_b, &index_carry))
3332 return false;
3333
3334 /* Save the distance vector if we initialized one. */
3335 if (init_b)
3336 {
3337 /* Verify a basic constraint: classic distance vectors should
3338 always be lexicographically positive.
3339
3340 Data references are collected in the order of execution of
3341 the program, thus for the following loop
3342
3343 | for (i = 1; i < 100; i++)
3344 | for (j = 1; j < 100; j++)
3345 | {
3346 | t = T[j+1][i-1]; // A
3347 | T[j][i] = t + 2; // B
3348 | }
3349
3350 references are collected following the direction of the wind:
3351 A then B. The data dependence tests are performed also
3352 following this order, such that we're looking at the distance
3353 separating the elements accessed by A from the elements later
3354 accessed by B. But in this example, the distance returned by
3355 test_dep (A, B) is lexicographically negative (-1, 1), that
3356 means that the access A occurs later than B with respect to
3357 the outer loop, ie. we're actually looking upwind. In this
3358 case we solve test_dep (B, A) looking downwind to the
3359 lexicographically positive solution, that returns the
3360 distance vector (1, -1). */
3361 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3362 {
3363 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3364 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3365 loop_nest))
3366 return false;
3367 compute_subscript_distance (ddr);
3368 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3369 save_v, &init_b, &index_carry))
3370 return false;
3371 save_dist_v (ddr, save_v);
3372 DDR_REVERSED_P (ddr) = true;
3373
3374 /* In this case there is a dependence forward for all the
3375 outer loops:
3376
3377 | for (k = 1; k < 100; k++)
3378 | for (i = 1; i < 100; i++)
3379 | for (j = 1; j < 100; j++)
3380 | {
3381 | t = T[j+1][i-1]; // A
3382 | T[j][i] = t + 2; // B
3383 | }
3384
3385 the vectors are:
3386 (0, 1, -1)
3387 (1, 1, -1)
3388 (1, -1, 1)
3389 */
3390 if (DDR_NB_LOOPS (ddr) > 1)
3391 {
3392 add_outer_distances (ddr, save_v, index_carry);
3393 add_outer_distances (ddr, dist_v, index_carry);
3394 }
3395 }
3396 else
3397 {
3398 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3400
3401 if (DDR_NB_LOOPS (ddr) > 1)
3402 {
3403 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3404
3405 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3406 DDR_A (ddr), loop_nest))
3407 return false;
3408 compute_subscript_distance (ddr);
3409 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3410 opposite_v, &init_b,
3411 &index_carry))
3412 return false;
3413
3414 save_dist_v (ddr, save_v);
3415 add_outer_distances (ddr, dist_v, index_carry);
3416 add_outer_distances (ddr, opposite_v, index_carry);
3417 }
3418 else
3419 save_dist_v (ddr, save_v);
3420 }
3421 }
3422 else
3423 {
3424 /* There is a distance of 1 on all the outer loops: Example:
3425 there is a dependence of distance 1 on loop_1 for the array A.
3426
3427 | loop_1
3428 | A[5] = ...
3429 | endloop
3430 */
3431 add_outer_distances (ddr, dist_v,
3432 lambda_vector_first_nz (dist_v,
3433 DDR_NB_LOOPS (ddr), 0));
3434 }
3435
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 {
3438 unsigned i;
3439
3440 fprintf (dump_file, "(build_classic_dist_vector\n");
3441 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3442 {
3443 fprintf (dump_file, " dist_vector = (");
3444 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3445 DDR_NB_LOOPS (ddr));
3446 fprintf (dump_file, " )\n");
3447 }
3448 fprintf (dump_file, ")\n");
3449 }
3450
3451 return true;
3452 }
3453
3454 /* Return the direction for a given distance.
3455 FIXME: Computing dir this way is suboptimal, since dir can catch
3456 cases that dist is unable to represent. */
3457
3458 static inline enum data_dependence_direction
3459 dir_from_dist (int dist)
3460 {
3461 if (dist > 0)
3462 return dir_positive;
3463 else if (dist < 0)
3464 return dir_negative;
3465 else
3466 return dir_equal;
3467 }
3468
3469 /* Compute the classic per loop direction vector. DDR is the data
3470 dependence relation to build a vector from. */
3471
3472 static void
3473 build_classic_dir_vector (struct data_dependence_relation *ddr)
3474 {
3475 unsigned i, j;
3476 lambda_vector dist_v;
3477
3478 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3479 {
3480 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3481
3482 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3483 dir_v[j] = dir_from_dist (dist_v[j]);
3484
3485 save_dir_v (ddr, dir_v);
3486 }
3487 }
3488
3489 /* Helper function. Returns true when there is a dependence between
3490 data references DRA and DRB. */
3491
3492 static bool
3493 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3494 struct data_reference *dra,
3495 struct data_reference *drb,
3496 struct loop *loop_nest)
3497 {
3498 unsigned int i;
3499 tree last_conflicts;
3500 struct subscript *subscript;
3501 tree res = NULL_TREE;
3502
3503 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3504 {
3505 conflict_function *overlaps_a, *overlaps_b;
3506
3507 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3508 DR_ACCESS_FN (drb, i),
3509 &overlaps_a, &overlaps_b,
3510 &last_conflicts, loop_nest);
3511
3512 if (SUB_CONFLICTS_IN_A (subscript))
3513 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3514 if (SUB_CONFLICTS_IN_B (subscript))
3515 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3516
3517 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3518 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3519 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3520
3521 /* If there is any undetermined conflict function we have to
3522 give a conservative answer in case we cannot prove that
3523 no dependence exists when analyzing another subscript. */
3524 if (CF_NOT_KNOWN_P (overlaps_a)
3525 || CF_NOT_KNOWN_P (overlaps_b))
3526 {
3527 res = chrec_dont_know;
3528 continue;
3529 }
3530
3531 /* When there is a subscript with no dependence we can stop. */
3532 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3533 || CF_NO_DEPENDENCE_P (overlaps_b))
3534 {
3535 res = chrec_known;
3536 break;
3537 }
3538 }
3539
3540 if (res == NULL_TREE)
3541 return true;
3542
3543 if (res == chrec_known)
3544 dependence_stats.num_dependence_independent++;
3545 else
3546 dependence_stats.num_dependence_undetermined++;
3547 finalize_ddr_dependent (ddr, res);
3548 return false;
3549 }
3550
3551 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3552
3553 static void
3554 subscript_dependence_tester (struct data_dependence_relation *ddr,
3555 struct loop *loop_nest)
3556 {
3557
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3559 fprintf (dump_file, "(subscript_dependence_tester \n");
3560
3561 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3562 dependence_stats.num_dependence_dependent++;
3563
3564 compute_subscript_distance (ddr);
3565 if (build_classic_dist_vector (ddr, loop_nest))
3566 build_classic_dir_vector (ddr);
3567
3568 if (dump_file && (dump_flags & TDF_DETAILS))
3569 fprintf (dump_file, ")\n");
3570 }
3571
3572 /* Returns true when all the access functions of A are affine or
3573 constant with respect to LOOP_NEST. */
3574
3575 static bool
3576 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3577 const struct loop *loop_nest)
3578 {
3579 unsigned int i;
3580 vec<tree> fns = DR_ACCESS_FNS (a);
3581 tree t;
3582
3583 FOR_EACH_VEC_ELT (fns, i, t)
3584 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3585 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3586 return false;
3587
3588 return true;
3589 }
3590
3591 /* Initializes an equation for an OMEGA problem using the information
3592 contained in the ACCESS_FUN. Returns true when the operation
3593 succeeded.
3594
3595 PB is the omega constraint system.
3596 EQ is the number of the equation to be initialized.
3597 OFFSET is used for shifting the variables names in the constraints:
3598 a constrain is composed of 2 * the number of variables surrounding
3599 dependence accesses. OFFSET is set either to 0 for the first n variables,
3600 then it is set to n.
3601 ACCESS_FUN is expected to be an affine chrec. */
3602
3603 static bool
3604 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3605 unsigned int offset, tree access_fun,
3606 struct data_dependence_relation *ddr)
3607 {
3608 switch (TREE_CODE (access_fun))
3609 {
3610 case POLYNOMIAL_CHREC:
3611 {
3612 tree left = CHREC_LEFT (access_fun);
3613 tree right = CHREC_RIGHT (access_fun);
3614 int var = CHREC_VARIABLE (access_fun);
3615 unsigned var_idx;
3616
3617 if (TREE_CODE (right) != INTEGER_CST)
3618 return false;
3619
3620 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3621 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3622
3623 /* Compute the innermost loop index. */
3624 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3625
3626 if (offset == 0)
3627 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3628 += int_cst_value (right);
3629
3630 switch (TREE_CODE (left))
3631 {
3632 case POLYNOMIAL_CHREC:
3633 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3634
3635 case INTEGER_CST:
3636 pb->eqs[eq].coef[0] += int_cst_value (left);
3637 return true;
3638
3639 default:
3640 return false;
3641 }
3642 }
3643
3644 case INTEGER_CST:
3645 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3646 return true;
3647
3648 default:
3649 return false;
3650 }
3651 }
3652
3653 /* As explained in the comments preceding init_omega_for_ddr, we have
3654 to set up a system for each loop level, setting outer loops
3655 variation to zero, and current loop variation to positive or zero.
3656 Save each lexico positive distance vector. */
3657
3658 static void
3659 omega_extract_distance_vectors (omega_pb pb,
3660 struct data_dependence_relation *ddr)
3661 {
3662 int eq, geq;
3663 unsigned i, j;
3664 struct loop *loopi, *loopj;
3665 enum omega_result res;
3666
3667 /* Set a new problem for each loop in the nest. The basis is the
3668 problem that we have initialized until now. On top of this we
3669 add new constraints. */
3670 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3671 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3672 {
3673 int dist = 0;
3674 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3675 DDR_NB_LOOPS (ddr));
3676
3677 omega_copy_problem (copy, pb);
3678
3679 /* For all the outer loops "loop_j", add "dj = 0". */
3680 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3681 {
3682 eq = omega_add_zero_eq (copy, omega_black);
3683 copy->eqs[eq].coef[j + 1] = 1;
3684 }
3685
3686 /* For "loop_i", add "0 <= di". */
3687 geq = omega_add_zero_geq (copy, omega_black);
3688 copy->geqs[geq].coef[i + 1] = 1;
3689
3690 /* Reduce the constraint system, and test that the current
3691 problem is feasible. */
3692 res = omega_simplify_problem (copy);
3693 if (res == omega_false
3694 || res == omega_unknown
3695 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3696 goto next_problem;
3697
3698 for (eq = 0; eq < copy->num_subs; eq++)
3699 if (copy->subs[eq].key == (int) i + 1)
3700 {
3701 dist = copy->subs[eq].coef[0];
3702 goto found_dist;
3703 }
3704
3705 if (dist == 0)
3706 {
3707 /* Reinitialize problem... */
3708 omega_copy_problem (copy, pb);
3709 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3710 {
3711 eq = omega_add_zero_eq (copy, omega_black);
3712 copy->eqs[eq].coef[j + 1] = 1;
3713 }
3714
3715 /* ..., but this time "di = 1". */
3716 eq = omega_add_zero_eq (copy, omega_black);
3717 copy->eqs[eq].coef[i + 1] = 1;
3718 copy->eqs[eq].coef[0] = -1;
3719
3720 res = omega_simplify_problem (copy);
3721 if (res == omega_false
3722 || res == omega_unknown
3723 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3724 goto next_problem;
3725
3726 for (eq = 0; eq < copy->num_subs; eq++)
3727 if (copy->subs[eq].key == (int) i + 1)
3728 {
3729 dist = copy->subs[eq].coef[0];
3730 goto found_dist;
3731 }
3732 }
3733
3734 found_dist:;
3735 /* Save the lexicographically positive distance vector. */
3736 if (dist >= 0)
3737 {
3738 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3739 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3740
3741 dist_v[i] = dist;
3742
3743 for (eq = 0; eq < copy->num_subs; eq++)
3744 if (copy->subs[eq].key > 0)
3745 {
3746 dist = copy->subs[eq].coef[0];
3747 dist_v[copy->subs[eq].key - 1] = dist;
3748 }
3749
3750 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3751 dir_v[j] = dir_from_dist (dist_v[j]);
3752
3753 save_dist_v (ddr, dist_v);
3754 save_dir_v (ddr, dir_v);
3755 }
3756
3757 next_problem:;
3758 omega_free_problem (copy);
3759 }
3760 }
3761
3762 /* This is called for each subscript of a tuple of data references:
3763 insert an equality for representing the conflicts. */
3764
3765 static bool
3766 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3767 struct data_dependence_relation *ddr,
3768 omega_pb pb, bool *maybe_dependent)
3769 {
3770 int eq;
3771 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3772 TREE_TYPE (access_fun_b));
3773 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3774 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3775 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3776 tree minus_one;
3777
3778 /* When the fun_a - fun_b is not constant, the dependence is not
3779 captured by the classic distance vector representation. */
3780 if (TREE_CODE (difference) != INTEGER_CST)
3781 return false;
3782
3783 /* ZIV test. */
3784 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3785 {
3786 /* There is no dependence. */
3787 *maybe_dependent = false;
3788 return true;
3789 }
3790
3791 minus_one = build_int_cst (type, -1);
3792 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3793
3794 eq = omega_add_zero_eq (pb, omega_black);
3795 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3796 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3797 /* There is probably a dependence, but the system of
3798 constraints cannot be built: answer "don't know". */
3799 return false;
3800
3801 /* GCD test. */
3802 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3803 && !int_divides_p (lambda_vector_gcd
3804 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3805 2 * DDR_NB_LOOPS (ddr)),
3806 pb->eqs[eq].coef[0]))
3807 {
3808 /* There is no dependence. */
3809 *maybe_dependent = false;
3810 return true;
3811 }
3812
3813 return true;
3814 }
3815
3816 /* Helper function, same as init_omega_for_ddr but specialized for
3817 data references A and B. */
3818
3819 static bool
3820 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3821 struct data_dependence_relation *ddr,
3822 omega_pb pb, bool *maybe_dependent)
3823 {
3824 unsigned i;
3825 int ineq;
3826 struct loop *loopi;
3827 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3828
3829 /* Insert an equality per subscript. */
3830 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3831 {
3832 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3833 ddr, pb, maybe_dependent))
3834 return false;
3835 else if (*maybe_dependent == false)
3836 {
3837 /* There is no dependence. */
3838 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3839 return true;
3840 }
3841 }
3842
3843 /* Insert inequalities: constraints corresponding to the iteration
3844 domain, i.e. the loops surrounding the references "loop_x" and
3845 the distance variables "dx". The layout of the OMEGA
3846 representation is as follows:
3847 - coef[0] is the constant
3848 - coef[1..nb_loops] are the protected variables that will not be
3849 removed by the solver: the "dx"
3850 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3851 */
3852 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3853 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3854 {
3855 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3856
3857 /* 0 <= loop_x */
3858 ineq = omega_add_zero_geq (pb, omega_black);
3859 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3860
3861 /* 0 <= loop_x + dx */
3862 ineq = omega_add_zero_geq (pb, omega_black);
3863 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3864 pb->geqs[ineq].coef[i + 1] = 1;
3865
3866 if (nbi != -1)
3867 {
3868 /* loop_x <= nb_iters */
3869 ineq = omega_add_zero_geq (pb, omega_black);
3870 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3871 pb->geqs[ineq].coef[0] = nbi;
3872
3873 /* loop_x + dx <= nb_iters */
3874 ineq = omega_add_zero_geq (pb, omega_black);
3875 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3876 pb->geqs[ineq].coef[i + 1] = -1;
3877 pb->geqs[ineq].coef[0] = nbi;
3878
3879 /* A step "dx" bigger than nb_iters is not feasible, so
3880 add "0 <= nb_iters + dx", */
3881 ineq = omega_add_zero_geq (pb, omega_black);
3882 pb->geqs[ineq].coef[i + 1] = 1;
3883 pb->geqs[ineq].coef[0] = nbi;
3884 /* and "dx <= nb_iters". */
3885 ineq = omega_add_zero_geq (pb, omega_black);
3886 pb->geqs[ineq].coef[i + 1] = -1;
3887 pb->geqs[ineq].coef[0] = nbi;
3888 }
3889 }
3890
3891 omega_extract_distance_vectors (pb, ddr);
3892
3893 return true;
3894 }
3895
3896 /* Sets up the Omega dependence problem for the data dependence
3897 relation DDR. Returns false when the constraint system cannot be
3898 built, ie. when the test answers "don't know". Returns true
3899 otherwise, and when independence has been proved (using one of the
3900 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3901 set MAYBE_DEPENDENT to true.
3902
3903 Example: for setting up the dependence system corresponding to the
3904 conflicting accesses
3905
3906 | loop_i
3907 | loop_j
3908 | A[i, i+1] = ...
3909 | ... A[2*j, 2*(i + j)]
3910 | endloop_j
3911 | endloop_i
3912
3913 the following constraints come from the iteration domain:
3914
3915 0 <= i <= Ni
3916 0 <= i + di <= Ni
3917 0 <= j <= Nj
3918 0 <= j + dj <= Nj
3919
3920 where di, dj are the distance variables. The constraints
3921 representing the conflicting elements are:
3922
3923 i = 2 * (j + dj)
3924 i + 1 = 2 * (i + di + j + dj)
3925
3926 For asking that the resulting distance vector (di, dj) be
3927 lexicographically positive, we insert the constraint "di >= 0". If
3928 "di = 0" in the solution, we fix that component to zero, and we
3929 look at the inner loops: we set a new problem where all the outer
3930 loop distances are zero, and fix this inner component to be
3931 positive. When one of the components is positive, we save that
3932 distance, and set a new problem where the distance on this loop is
3933 zero, searching for other distances in the inner loops. Here is
3934 the classic example that illustrates that we have to set for each
3935 inner loop a new problem:
3936
3937 | loop_1
3938 | loop_2
3939 | A[10]
3940 | endloop_2
3941 | endloop_1
3942
3943 we have to save two distances (1, 0) and (0, 1).
3944
3945 Given two array references, refA and refB, we have to set the
3946 dependence problem twice, refA vs. refB and refB vs. refA, and we
3947 cannot do a single test, as refB might occur before refA in the
3948 inner loops, and the contrary when considering outer loops: ex.
3949
3950 | loop_0
3951 | loop_1
3952 | loop_2
3953 | T[{1,+,1}_2][{1,+,1}_1] // refA
3954 | T[{2,+,1}_2][{0,+,1}_1] // refB
3955 | endloop_2
3956 | endloop_1
3957 | endloop_0
3958
3959 refB touches the elements in T before refA, and thus for the same
3960 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3961 but for successive loop_0 iterations, we have (1, -1, 1)
3962
3963 The Omega solver expects the distance variables ("di" in the
3964 previous example) to come first in the constraint system (as
3965 variables to be protected, or "safe" variables), the constraint
3966 system is built using the following layout:
3967
3968 "cst | distance vars | index vars".
3969 */
3970
3971 static bool
3972 init_omega_for_ddr (struct data_dependence_relation *ddr,
3973 bool *maybe_dependent)
3974 {
3975 omega_pb pb;
3976 bool res = false;
3977
3978 *maybe_dependent = true;
3979
3980 if (same_access_functions (ddr))
3981 {
3982 unsigned j;
3983 lambda_vector dir_v;
3984
3985 /* Save the 0 vector. */
3986 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3987 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3988 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3989 dir_v[j] = dir_equal;
3990 save_dir_v (ddr, dir_v);
3991
3992 /* Save the dependences carried by outer loops. */
3993 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3994 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3995 maybe_dependent);
3996 omega_free_problem (pb);
3997 return res;
3998 }
3999
4000 /* Omega expects the protected variables (those that have to be kept
4001 after elimination) to appear first in the constraint system.
4002 These variables are the distance variables. In the following
4003 initialization we declare NB_LOOPS safe variables, and the total
4004 number of variables for the constraint system is 2*NB_LOOPS. */
4005 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4006 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4007 maybe_dependent);
4008 omega_free_problem (pb);
4009
4010 /* Stop computation if not decidable, or no dependence. */
4011 if (res == false || *maybe_dependent == false)
4012 return res;
4013
4014 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4015 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4016 maybe_dependent);
4017 omega_free_problem (pb);
4018
4019 return res;
4020 }
4021
4022 /* Return true when DDR contains the same information as that stored
4023 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4024
4025 static bool
4026 ddr_consistent_p (FILE *file,
4027 struct data_dependence_relation *ddr,
4028 vec<lambda_vector> dist_vects,
4029 vec<lambda_vector> dir_vects)
4030 {
4031 unsigned int i, j;
4032
4033 /* If dump_file is set, output there. */
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4035 file = dump_file;
4036
4037 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4038 {
4039 lambda_vector b_dist_v;
4040 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4041 dist_vects.length (),
4042 DDR_NUM_DIST_VECTS (ddr));
4043
4044 fprintf (file, "Banerjee dist vectors:\n");
4045 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4046 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4047
4048 fprintf (file, "Omega dist vectors:\n");
4049 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4050 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4051
4052 fprintf (file, "data dependence relation:\n");
4053 dump_data_dependence_relation (file, ddr);
4054
4055 fprintf (file, ")\n");
4056 return false;
4057 }
4058
4059 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4060 {
4061 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4062 dir_vects.length (),
4063 DDR_NUM_DIR_VECTS (ddr));
4064 return false;
4065 }
4066
4067 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4068 {
4069 lambda_vector a_dist_v;
4070 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4071
4072 /* Distance vectors are not ordered in the same way in the DDR
4073 and in the DIST_VECTS: search for a matching vector. */
4074 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4075 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4076 break;
4077
4078 if (j == dist_vects.length ())
4079 {
4080 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4081 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4082 fprintf (file, "not found in Omega dist vectors:\n");
4083 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4084 fprintf (file, "data dependence relation:\n");
4085 dump_data_dependence_relation (file, ddr);
4086 fprintf (file, ")\n");
4087 }
4088 }
4089
4090 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4091 {
4092 lambda_vector a_dir_v;
4093 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4094
4095 /* Direction vectors are not ordered in the same way in the DDR
4096 and in the DIR_VECTS: search for a matching vector. */
4097 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4098 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4099 break;
4100
4101 if (j == dist_vects.length ())
4102 {
4103 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4104 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4105 fprintf (file, "not found in Omega dir vectors:\n");
4106 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4107 fprintf (file, "data dependence relation:\n");
4108 dump_data_dependence_relation (file, ddr);
4109 fprintf (file, ")\n");
4110 }
4111 }
4112
4113 return true;
4114 }
4115
4116 /* This computes the affine dependence relation between A and B with
4117 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4118 independence between two accesses, while CHREC_DONT_KNOW is used
4119 for representing the unknown relation.
4120
4121 Note that it is possible to stop the computation of the dependence
4122 relation the first time we detect a CHREC_KNOWN element for a given
4123 subscript. */
4124
4125 void
4126 compute_affine_dependence (struct data_dependence_relation *ddr,
4127 struct loop *loop_nest)
4128 {
4129 struct data_reference *dra = DDR_A (ddr);
4130 struct data_reference *drb = DDR_B (ddr);
4131
4132 if (dump_file && (dump_flags & TDF_DETAILS))
4133 {
4134 fprintf (dump_file, "(compute_affine_dependence\n");
4135 fprintf (dump_file, " stmt_a: ");
4136 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4137 fprintf (dump_file, " stmt_b: ");
4138 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4139 }
4140
4141 /* Analyze only when the dependence relation is not yet known. */
4142 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4143 {
4144 dependence_stats.num_dependence_tests++;
4145
4146 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4147 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4148 {
4149 if (flag_check_data_deps)
4150 {
4151 /* Compute the dependences using the first algorithm. */
4152 subscript_dependence_tester (ddr, loop_nest);
4153
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4155 {
4156 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4157 dump_data_dependence_relation (dump_file, ddr);
4158 }
4159
4160 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4161 {
4162 bool maybe_dependent;
4163 vec<lambda_vector> dir_vects, dist_vects;
4164
4165 /* Save the result of the first DD analyzer. */
4166 dist_vects = DDR_DIST_VECTS (ddr);
4167 dir_vects = DDR_DIR_VECTS (ddr);
4168
4169 /* Reset the information. */
4170 DDR_DIST_VECTS (ddr).create (0);
4171 DDR_DIR_VECTS (ddr).create (0);
4172
4173 /* Compute the same information using Omega. */
4174 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4175 goto csys_dont_know;
4176
4177 if (dump_file && (dump_flags & TDF_DETAILS))
4178 {
4179 fprintf (dump_file, "Omega Analyzer\n");
4180 dump_data_dependence_relation (dump_file, ddr);
4181 }
4182
4183 /* Check that we get the same information. */
4184 if (maybe_dependent)
4185 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4186 dir_vects));
4187 }
4188 }
4189 else
4190 subscript_dependence_tester (ddr, loop_nest);
4191 }
4192
4193 /* As a last case, if the dependence cannot be determined, or if
4194 the dependence is considered too difficult to determine, answer
4195 "don't know". */
4196 else
4197 {
4198 csys_dont_know:;
4199 dependence_stats.num_dependence_undetermined++;
4200
4201 if (dump_file && (dump_flags & TDF_DETAILS))
4202 {
4203 fprintf (dump_file, "Data ref a:\n");
4204 dump_data_reference (dump_file, dra);
4205 fprintf (dump_file, "Data ref b:\n");
4206 dump_data_reference (dump_file, drb);
4207 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4208 }
4209 finalize_ddr_dependent (ddr, chrec_dont_know);
4210 }
4211 }
4212
4213 if (dump_file && (dump_flags & TDF_DETAILS))
4214 {
4215 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4216 fprintf (dump_file, ") -> no dependence\n");
4217 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4218 fprintf (dump_file, ") -> dependence analysis failed\n");
4219 else
4220 fprintf (dump_file, ")\n");
4221 }
4222 }
4223
4224 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4225 the data references in DATAREFS, in the LOOP_NEST. When
4226 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4227 relations. Return true when successful, i.e. data references number
4228 is small enough to be handled. */
4229
4230 bool
4231 compute_all_dependences (vec<data_reference_p> datarefs,
4232 vec<ddr_p> *dependence_relations,
4233 vec<loop_p> loop_nest,
4234 bool compute_self_and_rr)
4235 {
4236 struct data_dependence_relation *ddr;
4237 struct data_reference *a, *b;
4238 unsigned int i, j;
4239
4240 if ((int) datarefs.length ()
4241 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4242 {
4243 struct data_dependence_relation *ddr;
4244
4245 /* Insert a single relation into dependence_relations:
4246 chrec_dont_know. */
4247 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4248 dependence_relations->safe_push (ddr);
4249 return false;
4250 }
4251
4252 FOR_EACH_VEC_ELT (datarefs, i, a)
4253 for (j = i + 1; datarefs.iterate (j, &b); j++)
4254 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4255 {
4256 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4257 dependence_relations->safe_push (ddr);
4258 if (loop_nest.exists ())
4259 compute_affine_dependence (ddr, loop_nest[0]);
4260 }
4261
4262 if (compute_self_and_rr)
4263 FOR_EACH_VEC_ELT (datarefs, i, a)
4264 {
4265 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4266 dependence_relations->safe_push (ddr);
4267 if (loop_nest.exists ())
4268 compute_affine_dependence (ddr, loop_nest[0]);
4269 }
4270
4271 return true;
4272 }
4273
4274 /* Describes a location of a memory reference. */
4275
4276 typedef struct data_ref_loc_d
4277 {
4278 /* Position of the memory reference. */
4279 tree *pos;
4280
4281 /* True if the memory reference is read. */
4282 bool is_read;
4283 } data_ref_loc;
4284
4285
4286 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4287 true if STMT clobbers memory, false otherwise. */
4288
4289 static bool
4290 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
4291 {
4292 bool clobbers_memory = false;
4293 data_ref_loc ref;
4294 tree *op0, *op1;
4295 enum gimple_code stmt_code = gimple_code (stmt);
4296
4297 references->create (0);
4298
4299 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4300 As we cannot model data-references to not spelled out
4301 accesses give up if they may occur. */
4302 if ((stmt_code == GIMPLE_CALL
4303 && !(gimple_call_flags (stmt) & ECF_CONST))
4304 || (stmt_code == GIMPLE_ASM
4305 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4306 clobbers_memory = true;
4307
4308 if (!gimple_vuse (stmt))
4309 return clobbers_memory;
4310
4311 if (stmt_code == GIMPLE_ASSIGN)
4312 {
4313 tree base;
4314 op0 = gimple_assign_lhs_ptr (stmt);
4315 op1 = gimple_assign_rhs1_ptr (stmt);
4316
4317 if (DECL_P (*op1)
4318 || (REFERENCE_CLASS_P (*op1)
4319 && (base = get_base_address (*op1))
4320 && TREE_CODE (base) != SSA_NAME))
4321 {
4322 ref.pos = op1;
4323 ref.is_read = true;
4324 references->safe_push (ref);
4325 }
4326 }
4327 else if (stmt_code == GIMPLE_CALL)
4328 {
4329 unsigned i, n;
4330
4331 op0 = gimple_call_lhs_ptr (stmt);
4332 n = gimple_call_num_args (stmt);
4333 for (i = 0; i < n; i++)
4334 {
4335 op1 = gimple_call_arg_ptr (stmt, i);
4336
4337 if (DECL_P (*op1)
4338 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4339 {
4340 ref.pos = op1;
4341 ref.is_read = true;
4342 references->safe_push (ref);
4343 }
4344 }
4345 }
4346 else
4347 return clobbers_memory;
4348
4349 if (*op0
4350 && (DECL_P (*op0)
4351 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4352 {
4353 ref.pos = op0;
4354 ref.is_read = false;
4355 references->safe_push (ref);
4356 }
4357 return clobbers_memory;
4358 }
4359
4360 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4361 reference, returns false, otherwise returns true. NEST is the outermost
4362 loop of the loop nest in which the references should be analyzed. */
4363
4364 bool
4365 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4366 vec<data_reference_p> *datarefs)
4367 {
4368 unsigned i;
4369 vec<data_ref_loc> references;
4370 data_ref_loc *ref;
4371 bool ret = true;
4372 data_reference_p dr;
4373
4374 if (get_references_in_stmt (stmt, &references))
4375 {
4376 references.release ();
4377 return false;
4378 }
4379
4380 FOR_EACH_VEC_ELT (references, i, ref)
4381 {
4382 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4383 *ref->pos, stmt, ref->is_read);
4384 gcc_assert (dr != NULL);
4385 datarefs->safe_push (dr);
4386 }
4387 references.release ();
4388 return ret;
4389 }
4390
4391 /* Stores the data references in STMT to DATAREFS. If there is an
4392 unanalyzable reference, returns false, otherwise returns true.
4393 NEST is the outermost loop of the loop nest in which the references
4394 should be instantiated, LOOP is the loop in which the references
4395 should be analyzed. */
4396
4397 bool
4398 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4399 vec<data_reference_p> *datarefs)
4400 {
4401 unsigned i;
4402 vec<data_ref_loc> references;
4403 data_ref_loc *ref;
4404 bool ret = true;
4405 data_reference_p dr;
4406
4407 if (get_references_in_stmt (stmt, &references))
4408 {
4409 references.release ();
4410 return false;
4411 }
4412
4413 FOR_EACH_VEC_ELT (references, i, ref)
4414 {
4415 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4416 gcc_assert (dr != NULL);
4417 datarefs->safe_push (dr);
4418 }
4419
4420 references.release ();
4421 return ret;
4422 }
4423
4424 /* Search the data references in LOOP, and record the information into
4425 DATAREFS. Returns chrec_dont_know when failing to analyze a
4426 difficult case, returns NULL_TREE otherwise. */
4427
4428 tree
4429 find_data_references_in_bb (struct loop *loop, basic_block bb,
4430 vec<data_reference_p> *datarefs)
4431 {
4432 gimple_stmt_iterator bsi;
4433
4434 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4435 {
4436 gimple stmt = gsi_stmt (bsi);
4437
4438 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4439 {
4440 struct data_reference *res;
4441 res = XCNEW (struct data_reference);
4442 datarefs->safe_push (res);
4443
4444 return chrec_dont_know;
4445 }
4446 }
4447
4448 return NULL_TREE;
4449 }
4450
4451 /* Search the data references in LOOP, and record the information into
4452 DATAREFS. Returns chrec_dont_know when failing to analyze a
4453 difficult case, returns NULL_TREE otherwise.
4454
4455 TODO: This function should be made smarter so that it can handle address
4456 arithmetic as if they were array accesses, etc. */
4457
4458 static tree
4459 find_data_references_in_loop (struct loop *loop,
4460 vec<data_reference_p> *datarefs)
4461 {
4462 basic_block bb, *bbs;
4463 unsigned int i;
4464
4465 bbs = get_loop_body_in_dom_order (loop);
4466
4467 for (i = 0; i < loop->num_nodes; i++)
4468 {
4469 bb = bbs[i];
4470
4471 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4472 {
4473 free (bbs);
4474 return chrec_dont_know;
4475 }
4476 }
4477 free (bbs);
4478
4479 return NULL_TREE;
4480 }
4481
4482 /* Recursive helper function. */
4483
4484 static bool
4485 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4486 {
4487 /* Inner loops of the nest should not contain siblings. Example:
4488 when there are two consecutive loops,
4489
4490 | loop_0
4491 | loop_1
4492 | A[{0, +, 1}_1]
4493 | endloop_1
4494 | loop_2
4495 | A[{0, +, 1}_2]
4496 | endloop_2
4497 | endloop_0
4498
4499 the dependence relation cannot be captured by the distance
4500 abstraction. */
4501 if (loop->next)
4502 return false;
4503
4504 loop_nest->safe_push (loop);
4505 if (loop->inner)
4506 return find_loop_nest_1 (loop->inner, loop_nest);
4507 return true;
4508 }
4509
4510 /* Return false when the LOOP is not well nested. Otherwise return
4511 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4512 contain the loops from the outermost to the innermost, as they will
4513 appear in the classic distance vector. */
4514
4515 bool
4516 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4517 {
4518 loop_nest->safe_push (loop);
4519 if (loop->inner)
4520 return find_loop_nest_1 (loop->inner, loop_nest);
4521 return true;
4522 }
4523
4524 /* Returns true when the data dependences have been computed, false otherwise.
4525 Given a loop nest LOOP, the following vectors are returned:
4526 DATAREFS is initialized to all the array elements contained in this loop,
4527 DEPENDENCE_RELATIONS contains the relations between the data references.
4528 Compute read-read and self relations if
4529 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4530
4531 bool
4532 compute_data_dependences_for_loop (struct loop *loop,
4533 bool compute_self_and_read_read_dependences,
4534 vec<loop_p> *loop_nest,
4535 vec<data_reference_p> *datarefs,
4536 vec<ddr_p> *dependence_relations)
4537 {
4538 bool res = true;
4539
4540 memset (&dependence_stats, 0, sizeof (dependence_stats));
4541
4542 /* If the loop nest is not well formed, or one of the data references
4543 is not computable, give up without spending time to compute other
4544 dependences. */
4545 if (!loop
4546 || !find_loop_nest (loop, loop_nest)
4547 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4548 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4549 compute_self_and_read_read_dependences))
4550 res = false;
4551
4552 if (dump_file && (dump_flags & TDF_STATS))
4553 {
4554 fprintf (dump_file, "Dependence tester statistics:\n");
4555
4556 fprintf (dump_file, "Number of dependence tests: %d\n",
4557 dependence_stats.num_dependence_tests);
4558 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4559 dependence_stats.num_dependence_dependent);
4560 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4561 dependence_stats.num_dependence_independent);
4562 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4563 dependence_stats.num_dependence_undetermined);
4564
4565 fprintf (dump_file, "Number of subscript tests: %d\n",
4566 dependence_stats.num_subscript_tests);
4567 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4568 dependence_stats.num_subscript_undetermined);
4569 fprintf (dump_file, "Number of same subscript function: %d\n",
4570 dependence_stats.num_same_subscript_function);
4571
4572 fprintf (dump_file, "Number of ziv tests: %d\n",
4573 dependence_stats.num_ziv);
4574 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4575 dependence_stats.num_ziv_dependent);
4576 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4577 dependence_stats.num_ziv_independent);
4578 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4579 dependence_stats.num_ziv_unimplemented);
4580
4581 fprintf (dump_file, "Number of siv tests: %d\n",
4582 dependence_stats.num_siv);
4583 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4584 dependence_stats.num_siv_dependent);
4585 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4586 dependence_stats.num_siv_independent);
4587 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4588 dependence_stats.num_siv_unimplemented);
4589
4590 fprintf (dump_file, "Number of miv tests: %d\n",
4591 dependence_stats.num_miv);
4592 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4593 dependence_stats.num_miv_dependent);
4594 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4595 dependence_stats.num_miv_independent);
4596 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4597 dependence_stats.num_miv_unimplemented);
4598 }
4599
4600 return res;
4601 }
4602
4603 /* Returns true when the data dependences for the basic block BB have been
4604 computed, false otherwise.
4605 DATAREFS is initialized to all the array elements contained in this basic
4606 block, DEPENDENCE_RELATIONS contains the relations between the data
4607 references. Compute read-read and self relations if
4608 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4609 bool
4610 compute_data_dependences_for_bb (basic_block bb,
4611 bool compute_self_and_read_read_dependences,
4612 vec<data_reference_p> *datarefs,
4613 vec<ddr_p> *dependence_relations)
4614 {
4615 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4616 return false;
4617
4618 return compute_all_dependences (*datarefs, dependence_relations,
4619 vec<loop_p>(),
4620 compute_self_and_read_read_dependences);
4621 }
4622
4623 /* Entry point (for testing only). Analyze all the data references
4624 and the dependence relations in LOOP.
4625
4626 The data references are computed first.
4627
4628 A relation on these nodes is represented by a complete graph. Some
4629 of the relations could be of no interest, thus the relations can be
4630 computed on demand.
4631
4632 In the following function we compute all the relations. This is
4633 just a first implementation that is here for:
4634 - for showing how to ask for the dependence relations,
4635 - for the debugging the whole dependence graph,
4636 - for the dejagnu testcases and maintenance.
4637
4638 It is possible to ask only for a part of the graph, avoiding to
4639 compute the whole dependence graph. The computed dependences are
4640 stored in a knowledge base (KB) such that later queries don't
4641 recompute the same information. The implementation of this KB is
4642 transparent to the optimizer, and thus the KB can be changed with a
4643 more efficient implementation, or the KB could be disabled. */
4644 static void
4645 analyze_all_data_dependences (struct loop *loop)
4646 {
4647 unsigned int i;
4648 int nb_data_refs = 10;
4649 vec<data_reference_p> datarefs;
4650 datarefs.create (nb_data_refs);
4651 vec<ddr_p> dependence_relations;
4652 dependence_relations.create (nb_data_refs * nb_data_refs);
4653 vec<loop_p> loop_nest;
4654 loop_nest.create (3);
4655
4656 /* Compute DDs on the whole function. */
4657 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4658 &dependence_relations);
4659
4660 if (dump_file)
4661 {
4662 dump_data_dependence_relations (dump_file, dependence_relations);
4663 fprintf (dump_file, "\n\n");
4664
4665 if (dump_flags & TDF_DETAILS)
4666 dump_dist_dir_vectors (dump_file, dependence_relations);
4667
4668 if (dump_flags & TDF_STATS)
4669 {
4670 unsigned nb_top_relations = 0;
4671 unsigned nb_bot_relations = 0;
4672 unsigned nb_chrec_relations = 0;
4673 struct data_dependence_relation *ddr;
4674
4675 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4676 {
4677 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4678 nb_top_relations++;
4679
4680 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4681 nb_bot_relations++;
4682
4683 else
4684 nb_chrec_relations++;
4685 }
4686
4687 gather_stats_on_scev_database ();
4688 }
4689 }
4690
4691 loop_nest.release ();
4692 free_dependence_relations (dependence_relations);
4693 free_data_refs (datarefs);
4694 }
4695
4696 /* Computes all the data dependences and check that the results of
4697 several analyzers are the same. */
4698
4699 void
4700 tree_check_data_deps (void)
4701 {
4702 loop_iterator li;
4703 struct loop *loop_nest;
4704
4705 FOR_EACH_LOOP (li, loop_nest, 0)
4706 analyze_all_data_dependences (loop_nest);
4707 }
4708
4709 /* Free the memory used by a data dependence relation DDR. */
4710
4711 void
4712 free_dependence_relation (struct data_dependence_relation *ddr)
4713 {
4714 if (ddr == NULL)
4715 return;
4716
4717 if (DDR_SUBSCRIPTS (ddr).exists ())
4718 free_subscripts (DDR_SUBSCRIPTS (ddr));
4719 DDR_DIST_VECTS (ddr).release ();
4720 DDR_DIR_VECTS (ddr).release ();
4721
4722 free (ddr);
4723 }
4724
4725 /* Free the memory used by the data dependence relations from
4726 DEPENDENCE_RELATIONS. */
4727
4728 void
4729 free_dependence_relations (vec<ddr_p> dependence_relations)
4730 {
4731 unsigned int i;
4732 struct data_dependence_relation *ddr;
4733
4734 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4735 if (ddr)
4736 free_dependence_relation (ddr);
4737
4738 dependence_relations.release ();
4739 }
4740
4741 /* Free the memory used by the data references from DATAREFS. */
4742
4743 void
4744 free_data_refs (vec<data_reference_p> datarefs)
4745 {
4746 unsigned int i;
4747 struct data_reference *dr;
4748
4749 FOR_EACH_VEC_ELT (datarefs, i, dr)
4750 free_data_ref (dr);
4751 datarefs.release ();
4752 }
4753
4754 \f
4755
4756 /* Dump vertex I in RDG to FILE. */
4757
4758 static void
4759 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4760 {
4761 struct vertex *v = &(rdg->vertices[i]);
4762 struct graph_edge *e;
4763
4764 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4765 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4766 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4767
4768 if (v->pred)
4769 for (e = v->pred; e; e = e->pred_next)
4770 fprintf (file, " %d", e->src);
4771
4772 fprintf (file, ") (out:");
4773
4774 if (v->succ)
4775 for (e = v->succ; e; e = e->succ_next)
4776 fprintf (file, " %d", e->dest);
4777
4778 fprintf (file, ")\n");
4779 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4780 fprintf (file, ")\n");
4781 }
4782
4783 /* Call dump_rdg_vertex on stderr. */
4784
4785 DEBUG_FUNCTION void
4786 debug_rdg_vertex (struct graph *rdg, int i)
4787 {
4788 dump_rdg_vertex (stderr, rdg, i);
4789 }
4790
4791 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4792 dumped vertices to that bitmap. */
4793
4794 static void
4795 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4796 {
4797 int i;
4798
4799 fprintf (file, "(%d\n", c);
4800
4801 for (i = 0; i < rdg->n_vertices; i++)
4802 if (rdg->vertices[i].component == c)
4803 {
4804 if (dumped)
4805 bitmap_set_bit (dumped, i);
4806
4807 dump_rdg_vertex (file, rdg, i);
4808 }
4809
4810 fprintf (file, ")\n");
4811 }
4812
4813 /* Call dump_rdg_vertex on stderr. */
4814
4815 DEBUG_FUNCTION void
4816 debug_rdg_component (struct graph *rdg, int c)
4817 {
4818 dump_rdg_component (stderr, rdg, c, NULL);
4819 }
4820
4821 /* Dump the reduced dependence graph RDG to FILE. */
4822
4823 void
4824 dump_rdg (FILE *file, struct graph *rdg)
4825 {
4826 int i;
4827 bitmap dumped = BITMAP_ALLOC (NULL);
4828
4829 fprintf (file, "(rdg\n");
4830
4831 for (i = 0; i < rdg->n_vertices; i++)
4832 if (!bitmap_bit_p (dumped, i))
4833 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4834
4835 fprintf (file, ")\n");
4836 BITMAP_FREE (dumped);
4837 }
4838
4839 /* Call dump_rdg on stderr. */
4840
4841 DEBUG_FUNCTION void
4842 debug_rdg (struct graph *rdg)
4843 {
4844 dump_rdg (stderr, rdg);
4845 }
4846
4847 static void
4848 dot_rdg_1 (FILE *file, struct graph *rdg)
4849 {
4850 int i;
4851
4852 fprintf (file, "digraph RDG {\n");
4853
4854 for (i = 0; i < rdg->n_vertices; i++)
4855 {
4856 struct vertex *v = &(rdg->vertices[i]);
4857 struct graph_edge *e;
4858
4859 /* Highlight reads from memory. */
4860 if (RDG_MEM_READS_STMT (rdg, i))
4861 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4862
4863 /* Highlight stores to memory. */
4864 if (RDG_MEM_WRITE_STMT (rdg, i))
4865 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4866
4867 if (v->succ)
4868 for (e = v->succ; e; e = e->succ_next)
4869 switch (RDGE_TYPE (e))
4870 {
4871 case input_dd:
4872 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4873 break;
4874
4875 case output_dd:
4876 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4877 break;
4878
4879 case flow_dd:
4880 /* These are the most common dependences: don't print these. */
4881 fprintf (file, "%d -> %d \n", i, e->dest);
4882 break;
4883
4884 case anti_dd:
4885 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4886 break;
4887
4888 default:
4889 gcc_unreachable ();
4890 }
4891 }
4892
4893 fprintf (file, "}\n\n");
4894 }
4895
4896 /* Display the Reduced Dependence Graph using dotty. */
4897 extern void dot_rdg (struct graph *);
4898
4899 DEBUG_FUNCTION void
4900 dot_rdg (struct graph *rdg)
4901 {
4902 /* When debugging, enable the following code. This cannot be used
4903 in production compilers because it calls "system". */
4904 #if 0
4905 FILE *file = fopen ("/tmp/rdg.dot", "w");
4906 gcc_assert (file != NULL);
4907
4908 dot_rdg_1 (file, rdg);
4909 fclose (file);
4910
4911 system ("dotty /tmp/rdg.dot &");
4912 #else
4913 dot_rdg_1 (stderr, rdg);
4914 #endif
4915 }
4916
4917 /* Returns the index of STMT in RDG. */
4918
4919 int
4920 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
4921 {
4922 int index = gimple_uid (stmt);
4923 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
4924 return index;
4925 }
4926
4927 /* Creates an edge in RDG for each distance vector from DDR. The
4928 order that we keep track of in the RDG is the order in which
4929 statements have to be executed. */
4930
4931 static void
4932 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4933 {
4934 struct graph_edge *e;
4935 int va, vb;
4936 data_reference_p dra = DDR_A (ddr);
4937 data_reference_p drb = DDR_B (ddr);
4938 unsigned level = ddr_dependence_level (ddr);
4939
4940 /* For non scalar dependences, when the dependence is REVERSED,
4941 statement B has to be executed before statement A. */
4942 if (level > 0
4943 && !DDR_REVERSED_P (ddr))
4944 {
4945 data_reference_p tmp = dra;
4946 dra = drb;
4947 drb = tmp;
4948 }
4949
4950 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4951 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4952
4953 if (va < 0 || vb < 0)
4954 return;
4955
4956 e = add_edge (rdg, va, vb);
4957 e->data = XNEW (struct rdg_edge);
4958
4959 RDGE_LEVEL (e) = level;
4960 RDGE_RELATION (e) = ddr;
4961
4962 /* Determines the type of the data dependence. */
4963 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4964 RDGE_TYPE (e) = input_dd;
4965 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4966 RDGE_TYPE (e) = output_dd;
4967 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4968 RDGE_TYPE (e) = flow_dd;
4969 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4970 RDGE_TYPE (e) = anti_dd;
4971 }
4972
4973 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4974 the index of DEF in RDG. */
4975
4976 static void
4977 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4978 {
4979 use_operand_p imm_use_p;
4980 imm_use_iterator iterator;
4981
4982 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4983 {
4984 struct graph_edge *e;
4985 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4986
4987 if (use < 0)
4988 continue;
4989
4990 e = add_edge (rdg, idef, use);
4991 e->data = XNEW (struct rdg_edge);
4992 RDGE_TYPE (e) = flow_dd;
4993 RDGE_RELATION (e) = NULL;
4994 }
4995 }
4996
4997 /* Creates the edges of the reduced dependence graph RDG. */
4998
4999 static void
5000 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
5001 {
5002 int i;
5003 struct data_dependence_relation *ddr;
5004 def_operand_p def_p;
5005 ssa_op_iter iter;
5006
5007 FOR_EACH_VEC_ELT (ddrs, i, ddr)
5008 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5009 create_rdg_edge_for_ddr (rdg, ddr);
5010
5011 for (i = 0; i < rdg->n_vertices; i++)
5012 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5013 iter, SSA_OP_DEF)
5014 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5015 }
5016
5017 /* Build the vertices of the reduced dependence graph RDG. */
5018
5019 static void
5020 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop)
5021 {
5022 int i, j;
5023 gimple stmt;
5024
5025 FOR_EACH_VEC_ELT (stmts, i, stmt)
5026 {
5027 vec<data_ref_loc> references;
5028 data_ref_loc *ref;
5029 struct vertex *v = &(rdg->vertices[i]);
5030
5031 /* Record statement to vertex mapping. */
5032 gimple_set_uid (stmt, i);
5033
5034 v->data = XNEW (struct rdg_vertex);
5035 RDGV_STMT (v) = stmt;
5036 RDGV_DATAREFS (v).create (0);
5037 RDGV_HAS_MEM_WRITE (v) = false;
5038 RDGV_HAS_MEM_READS (v) = false;
5039 if (gimple_code (stmt) == GIMPLE_PHI)
5040 continue;
5041
5042 get_references_in_stmt (stmt, &references);
5043 FOR_EACH_VEC_ELT (references, j, ref)
5044 {
5045 data_reference_p dr;
5046 if (!ref->is_read)
5047 RDGV_HAS_MEM_WRITE (v) = true;
5048 else
5049 RDGV_HAS_MEM_READS (v) = true;
5050 dr = create_data_ref (loop, loop_containing_stmt (stmt),
5051 *ref->pos, stmt, ref->is_read);
5052 if (dr)
5053 RDGV_DATAREFS (v).safe_push (dr);
5054 }
5055 references.release ();
5056 }
5057 }
5058
5059 /* Initialize STMTS with all the statements of LOOP. When
5060 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5061 which we discover statements is important as
5062 generate_loops_for_partition is using the same traversal for
5063 identifying statements. */
5064
5065 static void
5066 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
5067 {
5068 unsigned int i;
5069 basic_block *bbs = get_loop_body_in_dom_order (loop);
5070
5071 for (i = 0; i < loop->num_nodes; i++)
5072 {
5073 basic_block bb = bbs[i];
5074 gimple_stmt_iterator bsi;
5075 gimple stmt;
5076
5077 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5078 stmts->safe_push (gsi_stmt (bsi));
5079
5080 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5081 {
5082 stmt = gsi_stmt (bsi);
5083 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5084 stmts->safe_push (stmt);
5085 }
5086 }
5087
5088 free (bbs);
5089 }
5090
5091 /* Returns true when all the dependences are computable. */
5092
5093 static bool
5094 known_dependences_p (vec<ddr_p> dependence_relations)
5095 {
5096 ddr_p ddr;
5097 unsigned int i;
5098
5099 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5100 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5101 return false;
5102
5103 return true;
5104 }
5105
5106 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5107 statement of the loop nest, and one edge per data dependence or
5108 scalar dependence. */
5109
5110 struct graph *
5111 build_empty_rdg (int n_stmts)
5112 {
5113 struct graph *rdg = new_graph (n_stmts);
5114 return rdg;
5115 }
5116
5117 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5118 statement of the loop nest, and one edge per data dependence or
5119 scalar dependence. */
5120
5121 struct graph *
5122 build_rdg (struct loop *loop,
5123 vec<loop_p> *loop_nest,
5124 vec<ddr_p> *dependence_relations,
5125 vec<data_reference_p> *datarefs)
5126 {
5127 struct graph *rdg = NULL;
5128
5129 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5130 dependence_relations)
5131 && known_dependences_p (*dependence_relations))
5132 {
5133 vec<gimple> stmts;
5134 stmts.create (10);
5135 stmts_from_loop (loop, &stmts);
5136 rdg = build_empty_rdg (stmts.length ());
5137 create_rdg_vertices (rdg, stmts, loop);
5138 create_rdg_edges (rdg, *dependence_relations);
5139 stmts.release ();
5140 }
5141
5142 return rdg;
5143 }
5144
5145 /* Free the reduced dependence graph RDG. */
5146
5147 void
5148 free_rdg (struct graph *rdg)
5149 {
5150 int i;
5151
5152 for (i = 0; i < rdg->n_vertices; i++)
5153 {
5154 struct vertex *v = &(rdg->vertices[i]);
5155 struct graph_edge *e;
5156
5157 for (e = v->succ; e; e = e->succ_next)
5158 free (e->data);
5159
5160 gimple_set_uid (RDGV_STMT (v), -1);
5161 free_data_refs (RDGV_DATAREFS (v));
5162 free (v->data);
5163 }
5164
5165 free_graph (rdg);
5166 }
5167
5168 /* Determines whether the statement from vertex V of the RDG has a
5169 definition used outside the loop that contains this statement. */
5170
5171 bool
5172 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5173 {
5174 gimple stmt = RDG_STMT (rdg, v);
5175 struct loop *loop = loop_containing_stmt (stmt);
5176 use_operand_p imm_use_p;
5177 imm_use_iterator iterator;
5178 ssa_op_iter it;
5179 def_operand_p def_p;
5180
5181 if (!loop)
5182 return true;
5183
5184 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5185 {
5186 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5187 {
5188 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5189 return true;
5190 }
5191 }
5192
5193 return false;
5194 }