Pass to dr_analyze_indices the analysis loop for subscripts.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87
88 static struct datadep_stats
89 {
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
94
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
98
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
103
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
108
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
114
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
120
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
123 {
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
127 }
128
129 /* Returns true iff A divides B. */
130
131 static inline bool
132 int_divides_p (int a, int b)
133 {
134 return ((b % a) == 0);
135 }
136
137 \f
138
139 /* Dump into FILE all the data references from DATAREFS. */
140
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
143 {
144 unsigned int i;
145 struct data_reference *dr;
146
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
149 }
150
151 /* Dump into STDERR all the data references from DATAREFS. */
152
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
155 {
156 dump_data_references (stderr, datarefs);
157 }
158
159 /* Dump to STDERR all the dependence relations from DDRS. */
160
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
163 {
164 dump_data_dependence_relations (stderr, ddrs);
165 }
166
167 /* Dump into FILE all the dependence relations from DDRS. */
168
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
172 {
173 unsigned int i;
174 struct data_dependence_relation *ddr;
175
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
178 }
179
180 /* Print to STDERR the data_reference DR. */
181
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
184 {
185 dump_data_reference (stderr, dr);
186 }
187
188 /* Dump function for a DATA_REFERENCE structure. */
189
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
193 {
194 unsigned int i;
195
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
204
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
206 {
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
209 }
210 fprintf (outf, "#)\n");
211 }
212
213 /* Dumps the affine function described by FN to the file OUTF. */
214
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
217 {
218 unsigned i;
219 tree coef;
220
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
223 {
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
227 }
228 }
229
230 /* Dumps the conflict function CF to the file OUTF. */
231
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
234 {
235 unsigned i;
236
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
242 {
243 for (i = 0; i < cf->n; i++)
244 {
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
248 }
249 }
250 }
251
252 /* Dump function for a SUBSCRIPT structure. */
253
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
256 {
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
258
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
263 {
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
267 }
268
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
273 {
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
277 }
278
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
283 }
284
285 /* Print the classic direction vector DIRV to OUTF. */
286
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
291 {
292 int eq;
293
294 for (eq = 0; eq < length; eq++)
295 {
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
298
299 switch (dir)
300 {
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
325 }
326 }
327 fprintf (outf, "\n");
328 }
329
330 /* Print a vector of direction vectors. */
331
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
335 {
336 unsigned j;
337 lambda_vector v;
338
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
341 }
342
343 /* Print a vector of distance vectors. */
344
345 void
346 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
347 int length)
348 {
349 unsigned j;
350 lambda_vector v;
351
352 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
353 print_lambda_vector (outf, v, length);
354 }
355
356 /* Debug version. */
357
358 DEBUG_FUNCTION void
359 debug_data_dependence_relation (struct data_dependence_relation *ddr)
360 {
361 dump_data_dependence_relation (stderr, ddr);
362 }
363
364 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
365
366 void
367 dump_data_dependence_relation (FILE *outf,
368 struct data_dependence_relation *ddr)
369 {
370 struct data_reference *dra, *drb;
371
372 fprintf (outf, "(Data Dep: \n");
373
374 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
375 {
376 if (ddr)
377 {
378 dra = DDR_A (ddr);
379 drb = DDR_B (ddr);
380 if (dra)
381 dump_data_reference (outf, dra);
382 else
383 fprintf (outf, " (nil)\n");
384 if (drb)
385 dump_data_reference (outf, drb);
386 else
387 fprintf (outf, " (nil)\n");
388 }
389 fprintf (outf, " (don't know)\n)\n");
390 return;
391 }
392
393 dra = DDR_A (ddr);
394 drb = DDR_B (ddr);
395 dump_data_reference (outf, dra);
396 dump_data_reference (outf, drb);
397
398 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
399 fprintf (outf, " (no dependence)\n");
400
401 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
402 {
403 unsigned int i;
404 struct loop *loopi;
405
406 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
407 {
408 fprintf (outf, " access_fn_A: ");
409 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
410 fprintf (outf, " access_fn_B: ");
411 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
412 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
413 }
414
415 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
416 fprintf (outf, " loop nest: (");
417 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
418 fprintf (outf, "%d ", loopi->num);
419 fprintf (outf, ")\n");
420
421 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
422 {
423 fprintf (outf, " distance_vector: ");
424 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
425 DDR_NB_LOOPS (ddr));
426 }
427
428 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
429 {
430 fprintf (outf, " direction_vector: ");
431 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
432 DDR_NB_LOOPS (ddr));
433 }
434 }
435
436 fprintf (outf, ")\n");
437 }
438
439 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
440
441 void
442 dump_data_dependence_direction (FILE *file,
443 enum data_dependence_direction dir)
444 {
445 switch (dir)
446 {
447 case dir_positive:
448 fprintf (file, "+");
449 break;
450
451 case dir_negative:
452 fprintf (file, "-");
453 break;
454
455 case dir_equal:
456 fprintf (file, "=");
457 break;
458
459 case dir_positive_or_negative:
460 fprintf (file, "+-");
461 break;
462
463 case dir_positive_or_equal:
464 fprintf (file, "+=");
465 break;
466
467 case dir_negative_or_equal:
468 fprintf (file, "-=");
469 break;
470
471 case dir_star:
472 fprintf (file, "*");
473 break;
474
475 default:
476 break;
477 }
478 }
479
480 /* Dumps the distance and direction vectors in FILE. DDRS contains
481 the dependence relations, and VECT_SIZE is the size of the
482 dependence vectors, or in other words the number of loops in the
483 considered nest. */
484
485 void
486 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
487 {
488 unsigned int i, j;
489 struct data_dependence_relation *ddr;
490 lambda_vector v;
491
492 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
493 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
494 {
495 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
496 {
497 fprintf (file, "DISTANCE_V (");
498 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
499 fprintf (file, ")\n");
500 }
501
502 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
503 {
504 fprintf (file, "DIRECTION_V (");
505 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
506 fprintf (file, ")\n");
507 }
508 }
509
510 fprintf (file, "\n\n");
511 }
512
513 /* Dumps the data dependence relations DDRS in FILE. */
514
515 void
516 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
517 {
518 unsigned int i;
519 struct data_dependence_relation *ddr;
520
521 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
522 dump_data_dependence_relation (file, ddr);
523
524 fprintf (file, "\n\n");
525 }
526
527 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
528 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
529 constant of type ssizetype, and returns true. If we cannot do this
530 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
531 is returned. */
532
533 static bool
534 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
535 tree *var, tree *off)
536 {
537 tree var0, var1;
538 tree off0, off1;
539 enum tree_code ocode = code;
540
541 *var = NULL_TREE;
542 *off = NULL_TREE;
543
544 switch (code)
545 {
546 case INTEGER_CST:
547 *var = build_int_cst (type, 0);
548 *off = fold_convert (ssizetype, op0);
549 return true;
550
551 case POINTER_PLUS_EXPR:
552 ocode = PLUS_EXPR;
553 /* FALLTHROUGH */
554 case PLUS_EXPR:
555 case MINUS_EXPR:
556 split_constant_offset (op0, &var0, &off0);
557 split_constant_offset (op1, &var1, &off1);
558 *var = fold_build2 (code, type, var0, var1);
559 *off = size_binop (ocode, off0, off1);
560 return true;
561
562 case MULT_EXPR:
563 if (TREE_CODE (op1) != INTEGER_CST)
564 return false;
565
566 split_constant_offset (op0, &var0, &off0);
567 *var = fold_build2 (MULT_EXPR, type, var0, op1);
568 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
569 return true;
570
571 case ADDR_EXPR:
572 {
573 tree base, poffset;
574 HOST_WIDE_INT pbitsize, pbitpos;
575 enum machine_mode pmode;
576 int punsignedp, pvolatilep;
577
578 op0 = TREE_OPERAND (op0, 0);
579 if (!handled_component_p (op0))
580 return false;
581
582 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
583 &pmode, &punsignedp, &pvolatilep, false);
584
585 if (pbitpos % BITS_PER_UNIT != 0)
586 return false;
587 base = build_fold_addr_expr (base);
588 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
589
590 if (poffset)
591 {
592 split_constant_offset (poffset, &poffset, &off1);
593 off0 = size_binop (PLUS_EXPR, off0, off1);
594 if (POINTER_TYPE_P (TREE_TYPE (base)))
595 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
596 base, fold_convert (sizetype, poffset));
597 else
598 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
599 fold_convert (TREE_TYPE (base), poffset));
600 }
601
602 var0 = fold_convert (type, base);
603
604 /* If variable length types are involved, punt, otherwise casts
605 might be converted into ARRAY_REFs in gimplify_conversion.
606 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
607 possibly no longer appears in current GIMPLE, might resurface.
608 This perhaps could run
609 if (CONVERT_EXPR_P (var0))
610 {
611 gimplify_conversion (&var0);
612 // Attempt to fill in any within var0 found ARRAY_REF's
613 // element size from corresponding op embedded ARRAY_REF,
614 // if unsuccessful, just punt.
615 } */
616 while (POINTER_TYPE_P (type))
617 type = TREE_TYPE (type);
618 if (int_size_in_bytes (type) < 0)
619 return false;
620
621 *var = var0;
622 *off = off0;
623 return true;
624 }
625
626 case SSA_NAME:
627 {
628 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
629 enum tree_code subcode;
630
631 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
632 return false;
633
634 var0 = gimple_assign_rhs1 (def_stmt);
635 subcode = gimple_assign_rhs_code (def_stmt);
636 var1 = gimple_assign_rhs2 (def_stmt);
637
638 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
639 }
640 CASE_CONVERT:
641 {
642 /* We must not introduce undefined overflow, and we must not change the value.
643 Hence we're okay if the inner type doesn't overflow to start with
644 (pointer or signed), the outer type also is an integer or pointer
645 and the outer precision is at least as large as the inner. */
646 tree itype = TREE_TYPE (op0);
647 if ((POINTER_TYPE_P (itype)
648 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
649 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
650 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
651 {
652 split_constant_offset (op0, &var0, off);
653 *var = fold_convert (type, var0);
654 return true;
655 }
656 return false;
657 }
658
659 default:
660 return false;
661 }
662 }
663
664 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
665 will be ssizetype. */
666
667 void
668 split_constant_offset (tree exp, tree *var, tree *off)
669 {
670 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
671 enum tree_code code;
672
673 *var = exp;
674 *off = ssize_int (0);
675 STRIP_NOPS (exp);
676
677 if (automatically_generated_chrec_p (exp))
678 return;
679
680 otype = TREE_TYPE (exp);
681 code = TREE_CODE (exp);
682 extract_ops_from_tree (exp, &code, &op0, &op1);
683 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
684 {
685 *var = fold_convert (type, e);
686 *off = o;
687 }
688 }
689
690 /* Returns the address ADDR of an object in a canonical shape (without nop
691 casts, and with type of pointer to the object). */
692
693 static tree
694 canonicalize_base_object_address (tree addr)
695 {
696 tree orig = addr;
697
698 STRIP_NOPS (addr);
699
700 /* The base address may be obtained by casting from integer, in that case
701 keep the cast. */
702 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
703 return orig;
704
705 if (TREE_CODE (addr) != ADDR_EXPR)
706 return addr;
707
708 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
709 }
710
711 /* Analyzes the behavior of the memory reference DR in the innermost loop or
712 basic block that contains it. Returns true if analysis succeed or false
713 otherwise. */
714
715 bool
716 dr_analyze_innermost (struct data_reference *dr)
717 {
718 gimple stmt = DR_STMT (dr);
719 struct loop *loop = loop_containing_stmt (stmt);
720 tree ref = DR_REF (dr);
721 HOST_WIDE_INT pbitsize, pbitpos;
722 tree base, poffset;
723 enum machine_mode pmode;
724 int punsignedp, pvolatilep;
725 affine_iv base_iv, offset_iv;
726 tree init, dinit, step;
727 bool in_loop = (loop && loop->num);
728
729 if (dump_file && (dump_flags & TDF_DETAILS))
730 fprintf (dump_file, "analyze_innermost: ");
731
732 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
733 &pmode, &punsignedp, &pvolatilep, false);
734 gcc_assert (base != NULL_TREE);
735
736 if (pbitpos % BITS_PER_UNIT != 0)
737 {
738 if (dump_file && (dump_flags & TDF_DETAILS))
739 fprintf (dump_file, "failed: bit offset alignment.\n");
740 return false;
741 }
742
743 if (TREE_CODE (base) == MEM_REF)
744 {
745 if (!integer_zerop (TREE_OPERAND (base, 1)))
746 {
747 if (!poffset)
748 {
749 double_int moff = mem_ref_offset (base);
750 poffset = double_int_to_tree (sizetype, moff);
751 }
752 else
753 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
754 }
755 base = TREE_OPERAND (base, 0);
756 }
757 else
758 base = build_fold_addr_expr (base);
759 if (in_loop)
760 {
761 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
762 false))
763 {
764 if (dump_file && (dump_flags & TDF_DETAILS))
765 fprintf (dump_file, "failed: evolution of base is not affine.\n");
766 return false;
767 }
768 }
769 else
770 {
771 base_iv.base = base;
772 base_iv.step = ssize_int (0);
773 base_iv.no_overflow = true;
774 }
775
776 if (!poffset)
777 {
778 offset_iv.base = ssize_int (0);
779 offset_iv.step = ssize_int (0);
780 }
781 else
782 {
783 if (!in_loop)
784 {
785 offset_iv.base = poffset;
786 offset_iv.step = ssize_int (0);
787 }
788 else if (!simple_iv (loop, loop_containing_stmt (stmt),
789 poffset, &offset_iv, false))
790 {
791 if (dump_file && (dump_flags & TDF_DETAILS))
792 fprintf (dump_file, "failed: evolution of offset is not"
793 " affine.\n");
794 return false;
795 }
796 }
797
798 init = ssize_int (pbitpos / BITS_PER_UNIT);
799 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
800 init = size_binop (PLUS_EXPR, init, dinit);
801 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
802 init = size_binop (PLUS_EXPR, init, dinit);
803
804 step = size_binop (PLUS_EXPR,
805 fold_convert (ssizetype, base_iv.step),
806 fold_convert (ssizetype, offset_iv.step));
807
808 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
809
810 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
811 DR_INIT (dr) = init;
812 DR_STEP (dr) = step;
813
814 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
815
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 fprintf (dump_file, "success.\n");
818
819 return true;
820 }
821
822 /* Determines the base object and the list of indices of memory reference
823 DR, analyzed in LOOP and instantiated in loop nest NEST. */
824
825 static void
826 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
827 {
828 VEC (tree, heap) *access_fns = NULL;
829 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
830 tree base, off, access_fn = NULL_TREE;
831 basic_block before_loop = NULL;
832
833 if (nest)
834 before_loop = block_before_loop (nest);
835
836 while (handled_component_p (aref))
837 {
838 if (TREE_CODE (aref) == ARRAY_REF)
839 {
840 op = TREE_OPERAND (aref, 1);
841 if (nest)
842 {
843 access_fn = analyze_scalar_evolution (loop, op);
844 access_fn = instantiate_scev (before_loop, loop, access_fn);
845 VEC_safe_push (tree, heap, access_fns, access_fn);
846 }
847
848 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
849 }
850
851 aref = TREE_OPERAND (aref, 0);
852 }
853
854 if (nest
855 && (INDIRECT_REF_P (aref)
856 || TREE_CODE (aref) == MEM_REF))
857 {
858 op = TREE_OPERAND (aref, 0);
859 access_fn = analyze_scalar_evolution (loop, op);
860 access_fn = instantiate_scev (before_loop, loop, access_fn);
861 base = initial_condition (access_fn);
862 split_constant_offset (base, &base, &off);
863 if (TREE_CODE (aref) == MEM_REF)
864 off = size_binop (PLUS_EXPR, off,
865 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
866 access_fn = chrec_replace_initial_condition (access_fn,
867 fold_convert (TREE_TYPE (base), off));
868
869 TREE_OPERAND (aref, 0) = base;
870 VEC_safe_push (tree, heap, access_fns, access_fn);
871 }
872
873 if (TREE_CODE (aref) == MEM_REF)
874 TREE_OPERAND (aref, 1)
875 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
876
877 if (TREE_CODE (ref) == MEM_REF
878 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
879 && integer_zerop (TREE_OPERAND (ref, 1)))
880 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
881
882 /* For canonicalization purposes we'd like to strip all outermost
883 zero-offset component-refs.
884 ??? For now simply handle zero-index array-refs. */
885 while (TREE_CODE (ref) == ARRAY_REF
886 && integer_zerop (TREE_OPERAND (ref, 1)))
887 ref = TREE_OPERAND (ref, 0);
888
889 DR_BASE_OBJECT (dr) = ref;
890 DR_ACCESS_FNS (dr) = access_fns;
891 }
892
893 /* Extracts the alias analysis information from the memory reference DR. */
894
895 static void
896 dr_analyze_alias (struct data_reference *dr)
897 {
898 tree ref = DR_REF (dr);
899 tree base = get_base_address (ref), addr;
900
901 if (INDIRECT_REF_P (base)
902 || TREE_CODE (base) == MEM_REF)
903 {
904 addr = TREE_OPERAND (base, 0);
905 if (TREE_CODE (addr) == SSA_NAME)
906 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
907 }
908 }
909
910 /* Returns true if the address of DR is invariant. */
911
912 static bool
913 dr_address_invariant_p (struct data_reference *dr)
914 {
915 unsigned i;
916 tree idx;
917
918 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
919 if (tree_contains_chrecs (idx, NULL))
920 return false;
921
922 return true;
923 }
924
925 /* Frees data reference DR. */
926
927 void
928 free_data_ref (data_reference_p dr)
929 {
930 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
931 free (dr);
932 }
933
934 /* Analyzes memory reference MEMREF accessed in STMT. The reference
935 is read if IS_READ is true, write otherwise. Returns the
936 data_reference description of MEMREF. NEST is the outermost loop
937 in which the reference should be instantiated, LOOP is the loop in
938 which the data reference should be analyzed. */
939
940 struct data_reference *
941 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
942 bool is_read)
943 {
944 struct data_reference *dr;
945
946 if (dump_file && (dump_flags & TDF_DETAILS))
947 {
948 fprintf (dump_file, "Creating dr for ");
949 print_generic_expr (dump_file, memref, TDF_SLIM);
950 fprintf (dump_file, "\n");
951 }
952
953 dr = XCNEW (struct data_reference);
954 DR_STMT (dr) = stmt;
955 DR_REF (dr) = memref;
956 DR_IS_READ (dr) = is_read;
957
958 dr_analyze_innermost (dr);
959 dr_analyze_indices (dr, nest, loop);
960 dr_analyze_alias (dr);
961
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 {
964 fprintf (dump_file, "\tbase_address: ");
965 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\toffset from base address: ");
967 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\tconstant offset from base address: ");
969 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\tstep: ");
971 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
972 fprintf (dump_file, "\n\taligned to: ");
973 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
974 fprintf (dump_file, "\n\tbase_object: ");
975 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
976 fprintf (dump_file, "\n");
977 }
978
979 return dr;
980 }
981
982 /* Returns true if FNA == FNB. */
983
984 static bool
985 affine_function_equal_p (affine_fn fna, affine_fn fnb)
986 {
987 unsigned i, n = VEC_length (tree, fna);
988
989 if (n != VEC_length (tree, fnb))
990 return false;
991
992 for (i = 0; i < n; i++)
993 if (!operand_equal_p (VEC_index (tree, fna, i),
994 VEC_index (tree, fnb, i), 0))
995 return false;
996
997 return true;
998 }
999
1000 /* If all the functions in CF are the same, returns one of them,
1001 otherwise returns NULL. */
1002
1003 static affine_fn
1004 common_affine_function (conflict_function *cf)
1005 {
1006 unsigned i;
1007 affine_fn comm;
1008
1009 if (!CF_NONTRIVIAL_P (cf))
1010 return NULL;
1011
1012 comm = cf->fns[0];
1013
1014 for (i = 1; i < cf->n; i++)
1015 if (!affine_function_equal_p (comm, cf->fns[i]))
1016 return NULL;
1017
1018 return comm;
1019 }
1020
1021 /* Returns the base of the affine function FN. */
1022
1023 static tree
1024 affine_function_base (affine_fn fn)
1025 {
1026 return VEC_index (tree, fn, 0);
1027 }
1028
1029 /* Returns true if FN is a constant. */
1030
1031 static bool
1032 affine_function_constant_p (affine_fn fn)
1033 {
1034 unsigned i;
1035 tree coef;
1036
1037 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1038 if (!integer_zerop (coef))
1039 return false;
1040
1041 return true;
1042 }
1043
1044 /* Returns true if FN is the zero constant function. */
1045
1046 static bool
1047 affine_function_zero_p (affine_fn fn)
1048 {
1049 return (integer_zerop (affine_function_base (fn))
1050 && affine_function_constant_p (fn));
1051 }
1052
1053 /* Returns a signed integer type with the largest precision from TA
1054 and TB. */
1055
1056 static tree
1057 signed_type_for_types (tree ta, tree tb)
1058 {
1059 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1060 return signed_type_for (ta);
1061 else
1062 return signed_type_for (tb);
1063 }
1064
1065 /* Applies operation OP on affine functions FNA and FNB, and returns the
1066 result. */
1067
1068 static affine_fn
1069 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1070 {
1071 unsigned i, n, m;
1072 affine_fn ret;
1073 tree coef;
1074
1075 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1076 {
1077 n = VEC_length (tree, fna);
1078 m = VEC_length (tree, fnb);
1079 }
1080 else
1081 {
1082 n = VEC_length (tree, fnb);
1083 m = VEC_length (tree, fna);
1084 }
1085
1086 ret = VEC_alloc (tree, heap, m);
1087 for (i = 0; i < n; i++)
1088 {
1089 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1090 TREE_TYPE (VEC_index (tree, fnb, i)));
1091
1092 VEC_quick_push (tree, ret,
1093 fold_build2 (op, type,
1094 VEC_index (tree, fna, i),
1095 VEC_index (tree, fnb, i)));
1096 }
1097
1098 for (; VEC_iterate (tree, fna, i, coef); i++)
1099 VEC_quick_push (tree, ret,
1100 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1101 coef, integer_zero_node));
1102 for (; VEC_iterate (tree, fnb, i, coef); i++)
1103 VEC_quick_push (tree, ret,
1104 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1105 integer_zero_node, coef));
1106
1107 return ret;
1108 }
1109
1110 /* Returns the sum of affine functions FNA and FNB. */
1111
1112 static affine_fn
1113 affine_fn_plus (affine_fn fna, affine_fn fnb)
1114 {
1115 return affine_fn_op (PLUS_EXPR, fna, fnb);
1116 }
1117
1118 /* Returns the difference of affine functions FNA and FNB. */
1119
1120 static affine_fn
1121 affine_fn_minus (affine_fn fna, affine_fn fnb)
1122 {
1123 return affine_fn_op (MINUS_EXPR, fna, fnb);
1124 }
1125
1126 /* Frees affine function FN. */
1127
1128 static void
1129 affine_fn_free (affine_fn fn)
1130 {
1131 VEC_free (tree, heap, fn);
1132 }
1133
1134 /* Determine for each subscript in the data dependence relation DDR
1135 the distance. */
1136
1137 static void
1138 compute_subscript_distance (struct data_dependence_relation *ddr)
1139 {
1140 conflict_function *cf_a, *cf_b;
1141 affine_fn fn_a, fn_b, diff;
1142
1143 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1144 {
1145 unsigned int i;
1146
1147 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1148 {
1149 struct subscript *subscript;
1150
1151 subscript = DDR_SUBSCRIPT (ddr, i);
1152 cf_a = SUB_CONFLICTS_IN_A (subscript);
1153 cf_b = SUB_CONFLICTS_IN_B (subscript);
1154
1155 fn_a = common_affine_function (cf_a);
1156 fn_b = common_affine_function (cf_b);
1157 if (!fn_a || !fn_b)
1158 {
1159 SUB_DISTANCE (subscript) = chrec_dont_know;
1160 return;
1161 }
1162 diff = affine_fn_minus (fn_a, fn_b);
1163
1164 if (affine_function_constant_p (diff))
1165 SUB_DISTANCE (subscript) = affine_function_base (diff);
1166 else
1167 SUB_DISTANCE (subscript) = chrec_dont_know;
1168
1169 affine_fn_free (diff);
1170 }
1171 }
1172 }
1173
1174 /* Returns the conflict function for "unknown". */
1175
1176 static conflict_function *
1177 conflict_fn_not_known (void)
1178 {
1179 conflict_function *fn = XCNEW (conflict_function);
1180 fn->n = NOT_KNOWN;
1181
1182 return fn;
1183 }
1184
1185 /* Returns the conflict function for "independent". */
1186
1187 static conflict_function *
1188 conflict_fn_no_dependence (void)
1189 {
1190 conflict_function *fn = XCNEW (conflict_function);
1191 fn->n = NO_DEPENDENCE;
1192
1193 return fn;
1194 }
1195
1196 /* Returns true if the address of OBJ is invariant in LOOP. */
1197
1198 static bool
1199 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1200 {
1201 while (handled_component_p (obj))
1202 {
1203 if (TREE_CODE (obj) == ARRAY_REF)
1204 {
1205 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1206 need to check the stride and the lower bound of the reference. */
1207 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1208 loop->num)
1209 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1210 loop->num))
1211 return false;
1212 }
1213 else if (TREE_CODE (obj) == COMPONENT_REF)
1214 {
1215 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1216 loop->num))
1217 return false;
1218 }
1219 obj = TREE_OPERAND (obj, 0);
1220 }
1221
1222 if (!INDIRECT_REF_P (obj)
1223 && TREE_CODE (obj) != MEM_REF)
1224 return true;
1225
1226 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1227 loop->num);
1228 }
1229
1230 /* Returns false if we can prove that data references A and B do not alias,
1231 true otherwise. */
1232
1233 bool
1234 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1235 {
1236 tree addr_a = DR_BASE_OBJECT (a);
1237 tree addr_b = DR_BASE_OBJECT (b);
1238
1239 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1240 return refs_output_dependent_p (addr_a, addr_b);
1241 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1242 return refs_anti_dependent_p (addr_a, addr_b);
1243 return refs_may_alias_p (addr_a, addr_b);
1244 }
1245
1246 static void compute_self_dependence (struct data_dependence_relation *);
1247
1248 /* Initialize a data dependence relation between data accesses A and
1249 B. NB_LOOPS is the number of loops surrounding the references: the
1250 size of the classic distance/direction vectors. */
1251
1252 static struct data_dependence_relation *
1253 initialize_data_dependence_relation (struct data_reference *a,
1254 struct data_reference *b,
1255 VEC (loop_p, heap) *loop_nest)
1256 {
1257 struct data_dependence_relation *res;
1258 unsigned int i;
1259
1260 res = XNEW (struct data_dependence_relation);
1261 DDR_A (res) = a;
1262 DDR_B (res) = b;
1263 DDR_LOOP_NEST (res) = NULL;
1264 DDR_REVERSED_P (res) = false;
1265 DDR_SUBSCRIPTS (res) = NULL;
1266 DDR_DIR_VECTS (res) = NULL;
1267 DDR_DIST_VECTS (res) = NULL;
1268
1269 if (a == NULL || b == NULL)
1270 {
1271 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1272 return res;
1273 }
1274
1275 /* If the data references do not alias, then they are independent. */
1276 if (!dr_may_alias_p (a, b))
1277 {
1278 DDR_ARE_DEPENDENT (res) = chrec_known;
1279 return res;
1280 }
1281
1282 /* When the references are exactly the same, don't spend time doing
1283 the data dependence tests, just initialize the ddr and return. */
1284 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1285 {
1286 DDR_AFFINE_P (res) = true;
1287 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1288 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1289 DDR_LOOP_NEST (res) = loop_nest;
1290 DDR_INNER_LOOP (res) = 0;
1291 DDR_SELF_REFERENCE (res) = true;
1292 compute_self_dependence (res);
1293 return res;
1294 }
1295
1296 /* If the references do not access the same object, we do not know
1297 whether they alias or not. */
1298 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1299 {
1300 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1301 return res;
1302 }
1303
1304 /* If the base of the object is not invariant in the loop nest, we cannot
1305 analyze it. TODO -- in fact, it would suffice to record that there may
1306 be arbitrary dependences in the loops where the base object varies. */
1307 if (loop_nest
1308 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1309 DR_BASE_OBJECT (a)))
1310 {
1311 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1312 return res;
1313 }
1314
1315 /* If the number of dimensions of the access to not agree we can have
1316 a pointer access to a component of the array element type and an
1317 array access while the base-objects are still the same. Punt. */
1318 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1319 {
1320 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1321 return res;
1322 }
1323
1324 DDR_AFFINE_P (res) = true;
1325 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1326 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1327 DDR_LOOP_NEST (res) = loop_nest;
1328 DDR_INNER_LOOP (res) = 0;
1329 DDR_SELF_REFERENCE (res) = false;
1330
1331 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1332 {
1333 struct subscript *subscript;
1334
1335 subscript = XNEW (struct subscript);
1336 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1337 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1338 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1339 SUB_DISTANCE (subscript) = chrec_dont_know;
1340 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1341 }
1342
1343 return res;
1344 }
1345
1346 /* Frees memory used by the conflict function F. */
1347
1348 static void
1349 free_conflict_function (conflict_function *f)
1350 {
1351 unsigned i;
1352
1353 if (CF_NONTRIVIAL_P (f))
1354 {
1355 for (i = 0; i < f->n; i++)
1356 affine_fn_free (f->fns[i]);
1357 }
1358 free (f);
1359 }
1360
1361 /* Frees memory used by SUBSCRIPTS. */
1362
1363 static void
1364 free_subscripts (VEC (subscript_p, heap) *subscripts)
1365 {
1366 unsigned i;
1367 subscript_p s;
1368
1369 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1370 {
1371 free_conflict_function (s->conflicting_iterations_in_a);
1372 free_conflict_function (s->conflicting_iterations_in_b);
1373 free (s);
1374 }
1375 VEC_free (subscript_p, heap, subscripts);
1376 }
1377
1378 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1379 description. */
1380
1381 static inline void
1382 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1383 tree chrec)
1384 {
1385 if (dump_file && (dump_flags & TDF_DETAILS))
1386 {
1387 fprintf (dump_file, "(dependence classified: ");
1388 print_generic_expr (dump_file, chrec, 0);
1389 fprintf (dump_file, ")\n");
1390 }
1391
1392 DDR_ARE_DEPENDENT (ddr) = chrec;
1393 free_subscripts (DDR_SUBSCRIPTS (ddr));
1394 DDR_SUBSCRIPTS (ddr) = NULL;
1395 }
1396
1397 /* The dependence relation DDR cannot be represented by a distance
1398 vector. */
1399
1400 static inline void
1401 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1402 {
1403 if (dump_file && (dump_flags & TDF_DETAILS))
1404 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1405
1406 DDR_AFFINE_P (ddr) = false;
1407 }
1408
1409 \f
1410
1411 /* This section contains the classic Banerjee tests. */
1412
1413 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1414 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1415
1416 static inline bool
1417 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1418 {
1419 return (evolution_function_is_constant_p (chrec_a)
1420 && evolution_function_is_constant_p (chrec_b));
1421 }
1422
1423 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1424 variable, i.e., if the SIV (Single Index Variable) test is true. */
1425
1426 static bool
1427 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1428 {
1429 if ((evolution_function_is_constant_p (chrec_a)
1430 && evolution_function_is_univariate_p (chrec_b))
1431 || (evolution_function_is_constant_p (chrec_b)
1432 && evolution_function_is_univariate_p (chrec_a)))
1433 return true;
1434
1435 if (evolution_function_is_univariate_p (chrec_a)
1436 && evolution_function_is_univariate_p (chrec_b))
1437 {
1438 switch (TREE_CODE (chrec_a))
1439 {
1440 case POLYNOMIAL_CHREC:
1441 switch (TREE_CODE (chrec_b))
1442 {
1443 case POLYNOMIAL_CHREC:
1444 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1445 return false;
1446
1447 default:
1448 return true;
1449 }
1450
1451 default:
1452 return true;
1453 }
1454 }
1455
1456 return false;
1457 }
1458
1459 /* Creates a conflict function with N dimensions. The affine functions
1460 in each dimension follow. */
1461
1462 static conflict_function *
1463 conflict_fn (unsigned n, ...)
1464 {
1465 unsigned i;
1466 conflict_function *ret = XCNEW (conflict_function);
1467 va_list ap;
1468
1469 gcc_assert (0 < n && n <= MAX_DIM);
1470 va_start(ap, n);
1471
1472 ret->n = n;
1473 for (i = 0; i < n; i++)
1474 ret->fns[i] = va_arg (ap, affine_fn);
1475 va_end(ap);
1476
1477 return ret;
1478 }
1479
1480 /* Returns constant affine function with value CST. */
1481
1482 static affine_fn
1483 affine_fn_cst (tree cst)
1484 {
1485 affine_fn fn = VEC_alloc (tree, heap, 1);
1486 VEC_quick_push (tree, fn, cst);
1487 return fn;
1488 }
1489
1490 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1491
1492 static affine_fn
1493 affine_fn_univar (tree cst, unsigned dim, tree coef)
1494 {
1495 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1496 unsigned i;
1497
1498 gcc_assert (dim > 0);
1499 VEC_quick_push (tree, fn, cst);
1500 for (i = 1; i < dim; i++)
1501 VEC_quick_push (tree, fn, integer_zero_node);
1502 VEC_quick_push (tree, fn, coef);
1503 return fn;
1504 }
1505
1506 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1507 *OVERLAPS_B are initialized to the functions that describe the
1508 relation between the elements accessed twice by CHREC_A and
1509 CHREC_B. For k >= 0, the following property is verified:
1510
1511 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1512
1513 static void
1514 analyze_ziv_subscript (tree chrec_a,
1515 tree chrec_b,
1516 conflict_function **overlaps_a,
1517 conflict_function **overlaps_b,
1518 tree *last_conflicts)
1519 {
1520 tree type, difference;
1521 dependence_stats.num_ziv++;
1522
1523 if (dump_file && (dump_flags & TDF_DETAILS))
1524 fprintf (dump_file, "(analyze_ziv_subscript \n");
1525
1526 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1527 chrec_a = chrec_convert (type, chrec_a, NULL);
1528 chrec_b = chrec_convert (type, chrec_b, NULL);
1529 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1530
1531 switch (TREE_CODE (difference))
1532 {
1533 case INTEGER_CST:
1534 if (integer_zerop (difference))
1535 {
1536 /* The difference is equal to zero: the accessed index
1537 overlaps for each iteration in the loop. */
1538 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1539 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1540 *last_conflicts = chrec_dont_know;
1541 dependence_stats.num_ziv_dependent++;
1542 }
1543 else
1544 {
1545 /* The accesses do not overlap. */
1546 *overlaps_a = conflict_fn_no_dependence ();
1547 *overlaps_b = conflict_fn_no_dependence ();
1548 *last_conflicts = integer_zero_node;
1549 dependence_stats.num_ziv_independent++;
1550 }
1551 break;
1552
1553 default:
1554 /* We're not sure whether the indexes overlap. For the moment,
1555 conservatively answer "don't know". */
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1557 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1558
1559 *overlaps_a = conflict_fn_not_known ();
1560 *overlaps_b = conflict_fn_not_known ();
1561 *last_conflicts = chrec_dont_know;
1562 dependence_stats.num_ziv_unimplemented++;
1563 break;
1564 }
1565
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 fprintf (dump_file, ")\n");
1568 }
1569
1570 /* Sets NIT to the estimated number of executions of the statements in
1571 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1572 large as the number of iterations. If we have no reliable estimate,
1573 the function returns false, otherwise returns true. */
1574
1575 bool
1576 estimated_loop_iterations (struct loop *loop, bool conservative,
1577 double_int *nit)
1578 {
1579 estimate_numbers_of_iterations_loop (loop, true);
1580 if (conservative)
1581 {
1582 if (!loop->any_upper_bound)
1583 return false;
1584
1585 *nit = loop->nb_iterations_upper_bound;
1586 }
1587 else
1588 {
1589 if (!loop->any_estimate)
1590 return false;
1591
1592 *nit = loop->nb_iterations_estimate;
1593 }
1594
1595 return true;
1596 }
1597
1598 /* Similar to estimated_loop_iterations, but returns the estimate only
1599 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1600 on the number of iterations of LOOP could not be derived, returns -1. */
1601
1602 HOST_WIDE_INT
1603 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1604 {
1605 double_int nit;
1606 HOST_WIDE_INT hwi_nit;
1607
1608 if (!estimated_loop_iterations (loop, conservative, &nit))
1609 return -1;
1610
1611 if (!double_int_fits_in_shwi_p (nit))
1612 return -1;
1613 hwi_nit = double_int_to_shwi (nit);
1614
1615 return hwi_nit < 0 ? -1 : hwi_nit;
1616 }
1617
1618 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1619 and only if it fits to the int type. If this is not the case, or the
1620 estimate on the number of iterations of LOOP could not be derived, returns
1621 chrec_dont_know. */
1622
1623 static tree
1624 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1625 {
1626 double_int nit;
1627 tree type;
1628
1629 if (!estimated_loop_iterations (loop, conservative, &nit))
1630 return chrec_dont_know;
1631
1632 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1633 if (!double_int_fits_to_tree_p (type, nit))
1634 return chrec_dont_know;
1635
1636 return double_int_to_tree (type, nit);
1637 }
1638
1639 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1640 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1641 *OVERLAPS_B are initialized to the functions that describe the
1642 relation between the elements accessed twice by CHREC_A and
1643 CHREC_B. For k >= 0, the following property is verified:
1644
1645 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1646
1647 static void
1648 analyze_siv_subscript_cst_affine (tree chrec_a,
1649 tree chrec_b,
1650 conflict_function **overlaps_a,
1651 conflict_function **overlaps_b,
1652 tree *last_conflicts)
1653 {
1654 bool value0, value1, value2;
1655 tree type, difference, tmp;
1656
1657 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1658 chrec_a = chrec_convert (type, chrec_a, NULL);
1659 chrec_b = chrec_convert (type, chrec_b, NULL);
1660 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1661
1662 if (!chrec_is_positive (initial_condition (difference), &value0))
1663 {
1664 if (dump_file && (dump_flags & TDF_DETAILS))
1665 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1666
1667 dependence_stats.num_siv_unimplemented++;
1668 *overlaps_a = conflict_fn_not_known ();
1669 *overlaps_b = conflict_fn_not_known ();
1670 *last_conflicts = chrec_dont_know;
1671 return;
1672 }
1673 else
1674 {
1675 if (value0 == false)
1676 {
1677 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1678 {
1679 if (dump_file && (dump_flags & TDF_DETAILS))
1680 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1681
1682 *overlaps_a = conflict_fn_not_known ();
1683 *overlaps_b = conflict_fn_not_known ();
1684 *last_conflicts = chrec_dont_know;
1685 dependence_stats.num_siv_unimplemented++;
1686 return;
1687 }
1688 else
1689 {
1690 if (value1 == true)
1691 {
1692 /* Example:
1693 chrec_a = 12
1694 chrec_b = {10, +, 1}
1695 */
1696
1697 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1698 {
1699 HOST_WIDE_INT numiter;
1700 struct loop *loop = get_chrec_loop (chrec_b);
1701
1702 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1703 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1704 fold_build1 (ABS_EXPR, type, difference),
1705 CHREC_RIGHT (chrec_b));
1706 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1707 *last_conflicts = integer_one_node;
1708
1709
1710 /* Perform weak-zero siv test to see if overlap is
1711 outside the loop bounds. */
1712 numiter = estimated_loop_iterations_int (loop, false);
1713
1714 if (numiter >= 0
1715 && compare_tree_int (tmp, numiter) > 0)
1716 {
1717 free_conflict_function (*overlaps_a);
1718 free_conflict_function (*overlaps_b);
1719 *overlaps_a = conflict_fn_no_dependence ();
1720 *overlaps_b = conflict_fn_no_dependence ();
1721 *last_conflicts = integer_zero_node;
1722 dependence_stats.num_siv_independent++;
1723 return;
1724 }
1725 dependence_stats.num_siv_dependent++;
1726 return;
1727 }
1728
1729 /* When the step does not divide the difference, there are
1730 no overlaps. */
1731 else
1732 {
1733 *overlaps_a = conflict_fn_no_dependence ();
1734 *overlaps_b = conflict_fn_no_dependence ();
1735 *last_conflicts = integer_zero_node;
1736 dependence_stats.num_siv_independent++;
1737 return;
1738 }
1739 }
1740
1741 else
1742 {
1743 /* Example:
1744 chrec_a = 12
1745 chrec_b = {10, +, -1}
1746
1747 In this case, chrec_a will not overlap with chrec_b. */
1748 *overlaps_a = conflict_fn_no_dependence ();
1749 *overlaps_b = conflict_fn_no_dependence ();
1750 *last_conflicts = integer_zero_node;
1751 dependence_stats.num_siv_independent++;
1752 return;
1753 }
1754 }
1755 }
1756 else
1757 {
1758 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1759 {
1760 if (dump_file && (dump_flags & TDF_DETAILS))
1761 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1762
1763 *overlaps_a = conflict_fn_not_known ();
1764 *overlaps_b = conflict_fn_not_known ();
1765 *last_conflicts = chrec_dont_know;
1766 dependence_stats.num_siv_unimplemented++;
1767 return;
1768 }
1769 else
1770 {
1771 if (value2 == false)
1772 {
1773 /* Example:
1774 chrec_a = 3
1775 chrec_b = {10, +, -1}
1776 */
1777 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1778 {
1779 HOST_WIDE_INT numiter;
1780 struct loop *loop = get_chrec_loop (chrec_b);
1781
1782 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1783 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1784 CHREC_RIGHT (chrec_b));
1785 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1786 *last_conflicts = integer_one_node;
1787
1788 /* Perform weak-zero siv test to see if overlap is
1789 outside the loop bounds. */
1790 numiter = estimated_loop_iterations_int (loop, false);
1791
1792 if (numiter >= 0
1793 && compare_tree_int (tmp, numiter) > 0)
1794 {
1795 free_conflict_function (*overlaps_a);
1796 free_conflict_function (*overlaps_b);
1797 *overlaps_a = conflict_fn_no_dependence ();
1798 *overlaps_b = conflict_fn_no_dependence ();
1799 *last_conflicts = integer_zero_node;
1800 dependence_stats.num_siv_independent++;
1801 return;
1802 }
1803 dependence_stats.num_siv_dependent++;
1804 return;
1805 }
1806
1807 /* When the step does not divide the difference, there
1808 are no overlaps. */
1809 else
1810 {
1811 *overlaps_a = conflict_fn_no_dependence ();
1812 *overlaps_b = conflict_fn_no_dependence ();
1813 *last_conflicts = integer_zero_node;
1814 dependence_stats.num_siv_independent++;
1815 return;
1816 }
1817 }
1818 else
1819 {
1820 /* Example:
1821 chrec_a = 3
1822 chrec_b = {4, +, 1}
1823
1824 In this case, chrec_a will not overlap with chrec_b. */
1825 *overlaps_a = conflict_fn_no_dependence ();
1826 *overlaps_b = conflict_fn_no_dependence ();
1827 *last_conflicts = integer_zero_node;
1828 dependence_stats.num_siv_independent++;
1829 return;
1830 }
1831 }
1832 }
1833 }
1834 }
1835
1836 /* Helper recursive function for initializing the matrix A. Returns
1837 the initial value of CHREC. */
1838
1839 static tree
1840 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1841 {
1842 gcc_assert (chrec);
1843
1844 switch (TREE_CODE (chrec))
1845 {
1846 case POLYNOMIAL_CHREC:
1847 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1848
1849 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1850 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1851
1852 case PLUS_EXPR:
1853 case MULT_EXPR:
1854 case MINUS_EXPR:
1855 {
1856 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1857 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1858
1859 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1860 }
1861
1862 case NOP_EXPR:
1863 {
1864 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1865 return chrec_convert (chrec_type (chrec), op, NULL);
1866 }
1867
1868 case BIT_NOT_EXPR:
1869 {
1870 /* Handle ~X as -1 - X. */
1871 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1872 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1873 build_int_cst (TREE_TYPE (chrec), -1), op);
1874 }
1875
1876 case INTEGER_CST:
1877 return chrec;
1878
1879 default:
1880 gcc_unreachable ();
1881 return NULL_TREE;
1882 }
1883 }
1884
1885 #define FLOOR_DIV(x,y) ((x) / (y))
1886
1887 /* Solves the special case of the Diophantine equation:
1888 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1889
1890 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1891 number of iterations that loops X and Y run. The overlaps will be
1892 constructed as evolutions in dimension DIM. */
1893
1894 static void
1895 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1896 affine_fn *overlaps_a,
1897 affine_fn *overlaps_b,
1898 tree *last_conflicts, int dim)
1899 {
1900 if (((step_a > 0 && step_b > 0)
1901 || (step_a < 0 && step_b < 0)))
1902 {
1903 int step_overlaps_a, step_overlaps_b;
1904 int gcd_steps_a_b, last_conflict, tau2;
1905
1906 gcd_steps_a_b = gcd (step_a, step_b);
1907 step_overlaps_a = step_b / gcd_steps_a_b;
1908 step_overlaps_b = step_a / gcd_steps_a_b;
1909
1910 if (niter > 0)
1911 {
1912 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1913 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1914 last_conflict = tau2;
1915 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1916 }
1917 else
1918 *last_conflicts = chrec_dont_know;
1919
1920 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1921 build_int_cst (NULL_TREE,
1922 step_overlaps_a));
1923 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1924 build_int_cst (NULL_TREE,
1925 step_overlaps_b));
1926 }
1927
1928 else
1929 {
1930 *overlaps_a = affine_fn_cst (integer_zero_node);
1931 *overlaps_b = affine_fn_cst (integer_zero_node);
1932 *last_conflicts = integer_zero_node;
1933 }
1934 }
1935
1936 /* Solves the special case of a Diophantine equation where CHREC_A is
1937 an affine bivariate function, and CHREC_B is an affine univariate
1938 function. For example,
1939
1940 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1941
1942 has the following overlapping functions:
1943
1944 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1945 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1946 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1947
1948 FORNOW: This is a specialized implementation for a case occurring in
1949 a common benchmark. Implement the general algorithm. */
1950
1951 static void
1952 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1953 conflict_function **overlaps_a,
1954 conflict_function **overlaps_b,
1955 tree *last_conflicts)
1956 {
1957 bool xz_p, yz_p, xyz_p;
1958 int step_x, step_y, step_z;
1959 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1960 affine_fn overlaps_a_xz, overlaps_b_xz;
1961 affine_fn overlaps_a_yz, overlaps_b_yz;
1962 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1963 affine_fn ova1, ova2, ovb;
1964 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1965
1966 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1967 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1968 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1969
1970 niter_x =
1971 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1972 false);
1973 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1974 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1975
1976 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1977 {
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1979 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1980
1981 *overlaps_a = conflict_fn_not_known ();
1982 *overlaps_b = conflict_fn_not_known ();
1983 *last_conflicts = chrec_dont_know;
1984 return;
1985 }
1986
1987 niter = MIN (niter_x, niter_z);
1988 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1989 &overlaps_a_xz,
1990 &overlaps_b_xz,
1991 &last_conflicts_xz, 1);
1992 niter = MIN (niter_y, niter_z);
1993 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1994 &overlaps_a_yz,
1995 &overlaps_b_yz,
1996 &last_conflicts_yz, 2);
1997 niter = MIN (niter_x, niter_z);
1998 niter = MIN (niter_y, niter);
1999 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2000 &overlaps_a_xyz,
2001 &overlaps_b_xyz,
2002 &last_conflicts_xyz, 3);
2003
2004 xz_p = !integer_zerop (last_conflicts_xz);
2005 yz_p = !integer_zerop (last_conflicts_yz);
2006 xyz_p = !integer_zerop (last_conflicts_xyz);
2007
2008 if (xz_p || yz_p || xyz_p)
2009 {
2010 ova1 = affine_fn_cst (integer_zero_node);
2011 ova2 = affine_fn_cst (integer_zero_node);
2012 ovb = affine_fn_cst (integer_zero_node);
2013 if (xz_p)
2014 {
2015 affine_fn t0 = ova1;
2016 affine_fn t2 = ovb;
2017
2018 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2019 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2020 affine_fn_free (t0);
2021 affine_fn_free (t2);
2022 *last_conflicts = last_conflicts_xz;
2023 }
2024 if (yz_p)
2025 {
2026 affine_fn t0 = ova2;
2027 affine_fn t2 = ovb;
2028
2029 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2030 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2031 affine_fn_free (t0);
2032 affine_fn_free (t2);
2033 *last_conflicts = last_conflicts_yz;
2034 }
2035 if (xyz_p)
2036 {
2037 affine_fn t0 = ova1;
2038 affine_fn t2 = ova2;
2039 affine_fn t4 = ovb;
2040
2041 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2042 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2043 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2044 affine_fn_free (t0);
2045 affine_fn_free (t2);
2046 affine_fn_free (t4);
2047 *last_conflicts = last_conflicts_xyz;
2048 }
2049 *overlaps_a = conflict_fn (2, ova1, ova2);
2050 *overlaps_b = conflict_fn (1, ovb);
2051 }
2052 else
2053 {
2054 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2055 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2056 *last_conflicts = integer_zero_node;
2057 }
2058
2059 affine_fn_free (overlaps_a_xz);
2060 affine_fn_free (overlaps_b_xz);
2061 affine_fn_free (overlaps_a_yz);
2062 affine_fn_free (overlaps_b_yz);
2063 affine_fn_free (overlaps_a_xyz);
2064 affine_fn_free (overlaps_b_xyz);
2065 }
2066
2067 /* Determines the overlapping elements due to accesses CHREC_A and
2068 CHREC_B, that are affine functions. This function cannot handle
2069 symbolic evolution functions, ie. when initial conditions are
2070 parameters, because it uses lambda matrices of integers. */
2071
2072 static void
2073 analyze_subscript_affine_affine (tree chrec_a,
2074 tree chrec_b,
2075 conflict_function **overlaps_a,
2076 conflict_function **overlaps_b,
2077 tree *last_conflicts)
2078 {
2079 unsigned nb_vars_a, nb_vars_b, dim;
2080 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2081 lambda_matrix A, U, S;
2082 struct obstack scratch_obstack;
2083
2084 if (eq_evolutions_p (chrec_a, chrec_b))
2085 {
2086 /* The accessed index overlaps for each iteration in the
2087 loop. */
2088 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2089 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2090 *last_conflicts = chrec_dont_know;
2091 return;
2092 }
2093 if (dump_file && (dump_flags & TDF_DETAILS))
2094 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2095
2096 /* For determining the initial intersection, we have to solve a
2097 Diophantine equation. This is the most time consuming part.
2098
2099 For answering to the question: "Is there a dependence?" we have
2100 to prove that there exists a solution to the Diophantine
2101 equation, and that the solution is in the iteration domain,
2102 i.e. the solution is positive or zero, and that the solution
2103 happens before the upper bound loop.nb_iterations. Otherwise
2104 there is no dependence. This function outputs a description of
2105 the iterations that hold the intersections. */
2106
2107 nb_vars_a = nb_vars_in_chrec (chrec_a);
2108 nb_vars_b = nb_vars_in_chrec (chrec_b);
2109
2110 gcc_obstack_init (&scratch_obstack);
2111
2112 dim = nb_vars_a + nb_vars_b;
2113 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2114 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2115 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2116
2117 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2118 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2119 gamma = init_b - init_a;
2120
2121 /* Don't do all the hard work of solving the Diophantine equation
2122 when we already know the solution: for example,
2123 | {3, +, 1}_1
2124 | {3, +, 4}_2
2125 | gamma = 3 - 3 = 0.
2126 Then the first overlap occurs during the first iterations:
2127 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2128 */
2129 if (gamma == 0)
2130 {
2131 if (nb_vars_a == 1 && nb_vars_b == 1)
2132 {
2133 HOST_WIDE_INT step_a, step_b;
2134 HOST_WIDE_INT niter, niter_a, niter_b;
2135 affine_fn ova, ovb;
2136
2137 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2138 false);
2139 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2140 false);
2141 niter = MIN (niter_a, niter_b);
2142 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2143 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2144
2145 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2146 &ova, &ovb,
2147 last_conflicts, 1);
2148 *overlaps_a = conflict_fn (1, ova);
2149 *overlaps_b = conflict_fn (1, ovb);
2150 }
2151
2152 else if (nb_vars_a == 2 && nb_vars_b == 1)
2153 compute_overlap_steps_for_affine_1_2
2154 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2155
2156 else if (nb_vars_a == 1 && nb_vars_b == 2)
2157 compute_overlap_steps_for_affine_1_2
2158 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2159
2160 else
2161 {
2162 if (dump_file && (dump_flags & TDF_DETAILS))
2163 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2164 *overlaps_a = conflict_fn_not_known ();
2165 *overlaps_b = conflict_fn_not_known ();
2166 *last_conflicts = chrec_dont_know;
2167 }
2168 goto end_analyze_subs_aa;
2169 }
2170
2171 /* U.A = S */
2172 lambda_matrix_right_hermite (A, dim, 1, S, U);
2173
2174 if (S[0][0] < 0)
2175 {
2176 S[0][0] *= -1;
2177 lambda_matrix_row_negate (U, dim, 0);
2178 }
2179 gcd_alpha_beta = S[0][0];
2180
2181 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2182 but that is a quite strange case. Instead of ICEing, answer
2183 don't know. */
2184 if (gcd_alpha_beta == 0)
2185 {
2186 *overlaps_a = conflict_fn_not_known ();
2187 *overlaps_b = conflict_fn_not_known ();
2188 *last_conflicts = chrec_dont_know;
2189 goto end_analyze_subs_aa;
2190 }
2191
2192 /* The classic "gcd-test". */
2193 if (!int_divides_p (gcd_alpha_beta, gamma))
2194 {
2195 /* The "gcd-test" has determined that there is no integer
2196 solution, i.e. there is no dependence. */
2197 *overlaps_a = conflict_fn_no_dependence ();
2198 *overlaps_b = conflict_fn_no_dependence ();
2199 *last_conflicts = integer_zero_node;
2200 }
2201
2202 /* Both access functions are univariate. This includes SIV and MIV cases. */
2203 else if (nb_vars_a == 1 && nb_vars_b == 1)
2204 {
2205 /* Both functions should have the same evolution sign. */
2206 if (((A[0][0] > 0 && -A[1][0] > 0)
2207 || (A[0][0] < 0 && -A[1][0] < 0)))
2208 {
2209 /* The solutions are given by:
2210 |
2211 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2212 | [u21 u22] [y0]
2213
2214 For a given integer t. Using the following variables,
2215
2216 | i0 = u11 * gamma / gcd_alpha_beta
2217 | j0 = u12 * gamma / gcd_alpha_beta
2218 | i1 = u21
2219 | j1 = u22
2220
2221 the solutions are:
2222
2223 | x0 = i0 + i1 * t,
2224 | y0 = j0 + j1 * t. */
2225 HOST_WIDE_INT i0, j0, i1, j1;
2226
2227 i0 = U[0][0] * gamma / gcd_alpha_beta;
2228 j0 = U[0][1] * gamma / gcd_alpha_beta;
2229 i1 = U[1][0];
2230 j1 = U[1][1];
2231
2232 if ((i1 == 0 && i0 < 0)
2233 || (j1 == 0 && j0 < 0))
2234 {
2235 /* There is no solution.
2236 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2237 falls in here, but for the moment we don't look at the
2238 upper bound of the iteration domain. */
2239 *overlaps_a = conflict_fn_no_dependence ();
2240 *overlaps_b = conflict_fn_no_dependence ();
2241 *last_conflicts = integer_zero_node;
2242 goto end_analyze_subs_aa;
2243 }
2244
2245 if (i1 > 0 && j1 > 0)
2246 {
2247 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2248 (get_chrec_loop (chrec_a), false);
2249 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2250 (get_chrec_loop (chrec_b), false);
2251 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2252
2253 /* (X0, Y0) is a solution of the Diophantine equation:
2254 "chrec_a (X0) = chrec_b (Y0)". */
2255 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2256 CEIL (-j0, j1));
2257 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2258 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2259
2260 /* (X1, Y1) is the smallest positive solution of the eq
2261 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2262 first conflict occurs. */
2263 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2264 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2265 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2266
2267 if (niter > 0)
2268 {
2269 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2270 FLOOR_DIV (niter - j0, j1));
2271 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2272
2273 /* If the overlap occurs outside of the bounds of the
2274 loop, there is no dependence. */
2275 if (x1 >= niter || y1 >= niter)
2276 {
2277 *overlaps_a = conflict_fn_no_dependence ();
2278 *overlaps_b = conflict_fn_no_dependence ();
2279 *last_conflicts = integer_zero_node;
2280 goto end_analyze_subs_aa;
2281 }
2282 else
2283 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2284 }
2285 else
2286 *last_conflicts = chrec_dont_know;
2287
2288 *overlaps_a
2289 = conflict_fn (1,
2290 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2291 1,
2292 build_int_cst (NULL_TREE, i1)));
2293 *overlaps_b
2294 = conflict_fn (1,
2295 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2296 1,
2297 build_int_cst (NULL_TREE, j1)));
2298 }
2299 else
2300 {
2301 /* FIXME: For the moment, the upper bound of the
2302 iteration domain for i and j is not checked. */
2303 if (dump_file && (dump_flags & TDF_DETAILS))
2304 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2305 *overlaps_a = conflict_fn_not_known ();
2306 *overlaps_b = conflict_fn_not_known ();
2307 *last_conflicts = chrec_dont_know;
2308 }
2309 }
2310 else
2311 {
2312 if (dump_file && (dump_flags & TDF_DETAILS))
2313 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2314 *overlaps_a = conflict_fn_not_known ();
2315 *overlaps_b = conflict_fn_not_known ();
2316 *last_conflicts = chrec_dont_know;
2317 }
2318 }
2319 else
2320 {
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2322 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2323 *overlaps_a = conflict_fn_not_known ();
2324 *overlaps_b = conflict_fn_not_known ();
2325 *last_conflicts = chrec_dont_know;
2326 }
2327
2328 end_analyze_subs_aa:
2329 obstack_free (&scratch_obstack, NULL);
2330 if (dump_file && (dump_flags & TDF_DETAILS))
2331 {
2332 fprintf (dump_file, " (overlaps_a = ");
2333 dump_conflict_function (dump_file, *overlaps_a);
2334 fprintf (dump_file, ")\n (overlaps_b = ");
2335 dump_conflict_function (dump_file, *overlaps_b);
2336 fprintf (dump_file, ")\n");
2337 fprintf (dump_file, ")\n");
2338 }
2339 }
2340
2341 /* Returns true when analyze_subscript_affine_affine can be used for
2342 determining the dependence relation between chrec_a and chrec_b,
2343 that contain symbols. This function modifies chrec_a and chrec_b
2344 such that the analysis result is the same, and such that they don't
2345 contain symbols, and then can safely be passed to the analyzer.
2346
2347 Example: The analysis of the following tuples of evolutions produce
2348 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2349 vs. {0, +, 1}_1
2350
2351 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2352 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2353 */
2354
2355 static bool
2356 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2357 {
2358 tree diff, type, left_a, left_b, right_b;
2359
2360 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2361 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2362 /* FIXME: For the moment not handled. Might be refined later. */
2363 return false;
2364
2365 type = chrec_type (*chrec_a);
2366 left_a = CHREC_LEFT (*chrec_a);
2367 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2368 diff = chrec_fold_minus (type, left_a, left_b);
2369
2370 if (!evolution_function_is_constant_p (diff))
2371 return false;
2372
2373 if (dump_file && (dump_flags & TDF_DETAILS))
2374 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2375
2376 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2377 diff, CHREC_RIGHT (*chrec_a));
2378 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2379 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2380 build_int_cst (type, 0),
2381 right_b);
2382 return true;
2383 }
2384
2385 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2386 *OVERLAPS_B are initialized to the functions that describe the
2387 relation between the elements accessed twice by CHREC_A and
2388 CHREC_B. For k >= 0, the following property is verified:
2389
2390 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2391
2392 static void
2393 analyze_siv_subscript (tree chrec_a,
2394 tree chrec_b,
2395 conflict_function **overlaps_a,
2396 conflict_function **overlaps_b,
2397 tree *last_conflicts,
2398 int loop_nest_num)
2399 {
2400 dependence_stats.num_siv++;
2401
2402 if (dump_file && (dump_flags & TDF_DETAILS))
2403 fprintf (dump_file, "(analyze_siv_subscript \n");
2404
2405 if (evolution_function_is_constant_p (chrec_a)
2406 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2407 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2408 overlaps_a, overlaps_b, last_conflicts);
2409
2410 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2411 && evolution_function_is_constant_p (chrec_b))
2412 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2413 overlaps_b, overlaps_a, last_conflicts);
2414
2415 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2416 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2417 {
2418 if (!chrec_contains_symbols (chrec_a)
2419 && !chrec_contains_symbols (chrec_b))
2420 {
2421 analyze_subscript_affine_affine (chrec_a, chrec_b,
2422 overlaps_a, overlaps_b,
2423 last_conflicts);
2424
2425 if (CF_NOT_KNOWN_P (*overlaps_a)
2426 || CF_NOT_KNOWN_P (*overlaps_b))
2427 dependence_stats.num_siv_unimplemented++;
2428 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2429 || CF_NO_DEPENDENCE_P (*overlaps_b))
2430 dependence_stats.num_siv_independent++;
2431 else
2432 dependence_stats.num_siv_dependent++;
2433 }
2434 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2435 &chrec_b))
2436 {
2437 analyze_subscript_affine_affine (chrec_a, chrec_b,
2438 overlaps_a, overlaps_b,
2439 last_conflicts);
2440
2441 if (CF_NOT_KNOWN_P (*overlaps_a)
2442 || CF_NOT_KNOWN_P (*overlaps_b))
2443 dependence_stats.num_siv_unimplemented++;
2444 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2445 || CF_NO_DEPENDENCE_P (*overlaps_b))
2446 dependence_stats.num_siv_independent++;
2447 else
2448 dependence_stats.num_siv_dependent++;
2449 }
2450 else
2451 goto siv_subscript_dontknow;
2452 }
2453
2454 else
2455 {
2456 siv_subscript_dontknow:;
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2458 fprintf (dump_file, "siv test failed: unimplemented.\n");
2459 *overlaps_a = conflict_fn_not_known ();
2460 *overlaps_b = conflict_fn_not_known ();
2461 *last_conflicts = chrec_dont_know;
2462 dependence_stats.num_siv_unimplemented++;
2463 }
2464
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, ")\n");
2467 }
2468
2469 /* Returns false if we can prove that the greatest common divisor of the steps
2470 of CHREC does not divide CST, false otherwise. */
2471
2472 static bool
2473 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2474 {
2475 HOST_WIDE_INT cd = 0, val;
2476 tree step;
2477
2478 if (!host_integerp (cst, 0))
2479 return true;
2480 val = tree_low_cst (cst, 0);
2481
2482 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2483 {
2484 step = CHREC_RIGHT (chrec);
2485 if (!host_integerp (step, 0))
2486 return true;
2487 cd = gcd (cd, tree_low_cst (step, 0));
2488 chrec = CHREC_LEFT (chrec);
2489 }
2490
2491 return val % cd == 0;
2492 }
2493
2494 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2495 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2496 functions that describe the relation between the elements accessed
2497 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2498 is verified:
2499
2500 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2501
2502 static void
2503 analyze_miv_subscript (tree chrec_a,
2504 tree chrec_b,
2505 conflict_function **overlaps_a,
2506 conflict_function **overlaps_b,
2507 tree *last_conflicts,
2508 struct loop *loop_nest)
2509 {
2510 /* FIXME: This is a MIV subscript, not yet handled.
2511 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2512 (A[i] vs. A[j]).
2513
2514 In the SIV test we had to solve a Diophantine equation with two
2515 variables. In the MIV case we have to solve a Diophantine
2516 equation with 2*n variables (if the subscript uses n IVs).
2517 */
2518 tree type, difference;
2519
2520 dependence_stats.num_miv++;
2521 if (dump_file && (dump_flags & TDF_DETAILS))
2522 fprintf (dump_file, "(analyze_miv_subscript \n");
2523
2524 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2525 chrec_a = chrec_convert (type, chrec_a, NULL);
2526 chrec_b = chrec_convert (type, chrec_b, NULL);
2527 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2528
2529 if (eq_evolutions_p (chrec_a, chrec_b))
2530 {
2531 /* Access functions are the same: all the elements are accessed
2532 in the same order. */
2533 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2534 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2535 *last_conflicts = estimated_loop_iterations_tree
2536 (get_chrec_loop (chrec_a), true);
2537 dependence_stats.num_miv_dependent++;
2538 }
2539
2540 else if (evolution_function_is_constant_p (difference)
2541 /* For the moment, the following is verified:
2542 evolution_function_is_affine_multivariate_p (chrec_a,
2543 loop_nest->num) */
2544 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2545 {
2546 /* testsuite/.../ssa-chrec-33.c
2547 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2548
2549 The difference is 1, and all the evolution steps are multiples
2550 of 2, consequently there are no overlapping elements. */
2551 *overlaps_a = conflict_fn_no_dependence ();
2552 *overlaps_b = conflict_fn_no_dependence ();
2553 *last_conflicts = integer_zero_node;
2554 dependence_stats.num_miv_independent++;
2555 }
2556
2557 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2558 && !chrec_contains_symbols (chrec_a)
2559 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2560 && !chrec_contains_symbols (chrec_b))
2561 {
2562 /* testsuite/.../ssa-chrec-35.c
2563 {0, +, 1}_2 vs. {0, +, 1}_3
2564 the overlapping elements are respectively located at iterations:
2565 {0, +, 1}_x and {0, +, 1}_x,
2566 in other words, we have the equality:
2567 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2568
2569 Other examples:
2570 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2571 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2572
2573 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2574 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2575 */
2576 analyze_subscript_affine_affine (chrec_a, chrec_b,
2577 overlaps_a, overlaps_b, last_conflicts);
2578
2579 if (CF_NOT_KNOWN_P (*overlaps_a)
2580 || CF_NOT_KNOWN_P (*overlaps_b))
2581 dependence_stats.num_miv_unimplemented++;
2582 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2583 || CF_NO_DEPENDENCE_P (*overlaps_b))
2584 dependence_stats.num_miv_independent++;
2585 else
2586 dependence_stats.num_miv_dependent++;
2587 }
2588
2589 else
2590 {
2591 /* When the analysis is too difficult, answer "don't know". */
2592 if (dump_file && (dump_flags & TDF_DETAILS))
2593 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2594
2595 *overlaps_a = conflict_fn_not_known ();
2596 *overlaps_b = conflict_fn_not_known ();
2597 *last_conflicts = chrec_dont_know;
2598 dependence_stats.num_miv_unimplemented++;
2599 }
2600
2601 if (dump_file && (dump_flags & TDF_DETAILS))
2602 fprintf (dump_file, ")\n");
2603 }
2604
2605 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2606 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2607 OVERLAP_ITERATIONS_B are initialized with two functions that
2608 describe the iterations that contain conflicting elements.
2609
2610 Remark: For an integer k >= 0, the following equality is true:
2611
2612 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2613 */
2614
2615 static void
2616 analyze_overlapping_iterations (tree chrec_a,
2617 tree chrec_b,
2618 conflict_function **overlap_iterations_a,
2619 conflict_function **overlap_iterations_b,
2620 tree *last_conflicts, struct loop *loop_nest)
2621 {
2622 unsigned int lnn = loop_nest->num;
2623
2624 dependence_stats.num_subscript_tests++;
2625
2626 if (dump_file && (dump_flags & TDF_DETAILS))
2627 {
2628 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2629 fprintf (dump_file, " (chrec_a = ");
2630 print_generic_expr (dump_file, chrec_a, 0);
2631 fprintf (dump_file, ")\n (chrec_b = ");
2632 print_generic_expr (dump_file, chrec_b, 0);
2633 fprintf (dump_file, ")\n");
2634 }
2635
2636 if (chrec_a == NULL_TREE
2637 || chrec_b == NULL_TREE
2638 || chrec_contains_undetermined (chrec_a)
2639 || chrec_contains_undetermined (chrec_b))
2640 {
2641 dependence_stats.num_subscript_undetermined++;
2642
2643 *overlap_iterations_a = conflict_fn_not_known ();
2644 *overlap_iterations_b = conflict_fn_not_known ();
2645 }
2646
2647 /* If they are the same chrec, and are affine, they overlap
2648 on every iteration. */
2649 else if (eq_evolutions_p (chrec_a, chrec_b)
2650 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2651 || operand_equal_p (chrec_a, chrec_b, 0)))
2652 {
2653 dependence_stats.num_same_subscript_function++;
2654 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2655 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2656 *last_conflicts = chrec_dont_know;
2657 }
2658
2659 /* If they aren't the same, and aren't affine, we can't do anything
2660 yet. */
2661 else if ((chrec_contains_symbols (chrec_a)
2662 || chrec_contains_symbols (chrec_b))
2663 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2664 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2665 {
2666 dependence_stats.num_subscript_undetermined++;
2667 *overlap_iterations_a = conflict_fn_not_known ();
2668 *overlap_iterations_b = conflict_fn_not_known ();
2669 }
2670
2671 else if (ziv_subscript_p (chrec_a, chrec_b))
2672 analyze_ziv_subscript (chrec_a, chrec_b,
2673 overlap_iterations_a, overlap_iterations_b,
2674 last_conflicts);
2675
2676 else if (siv_subscript_p (chrec_a, chrec_b))
2677 analyze_siv_subscript (chrec_a, chrec_b,
2678 overlap_iterations_a, overlap_iterations_b,
2679 last_conflicts, lnn);
2680
2681 else
2682 analyze_miv_subscript (chrec_a, chrec_b,
2683 overlap_iterations_a, overlap_iterations_b,
2684 last_conflicts, loop_nest);
2685
2686 if (dump_file && (dump_flags & TDF_DETAILS))
2687 {
2688 fprintf (dump_file, " (overlap_iterations_a = ");
2689 dump_conflict_function (dump_file, *overlap_iterations_a);
2690 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2691 dump_conflict_function (dump_file, *overlap_iterations_b);
2692 fprintf (dump_file, ")\n");
2693 fprintf (dump_file, ")\n");
2694 }
2695 }
2696
2697 /* Helper function for uniquely inserting distance vectors. */
2698
2699 static void
2700 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2701 {
2702 unsigned i;
2703 lambda_vector v;
2704
2705 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2706 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2707 return;
2708
2709 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2710 }
2711
2712 /* Helper function for uniquely inserting direction vectors. */
2713
2714 static void
2715 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2716 {
2717 unsigned i;
2718 lambda_vector v;
2719
2720 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2721 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2722 return;
2723
2724 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2725 }
2726
2727 /* Add a distance of 1 on all the loops outer than INDEX. If we
2728 haven't yet determined a distance for this outer loop, push a new
2729 distance vector composed of the previous distance, and a distance
2730 of 1 for this outer loop. Example:
2731
2732 | loop_1
2733 | loop_2
2734 | A[10]
2735 | endloop_2
2736 | endloop_1
2737
2738 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2739 save (0, 1), then we have to save (1, 0). */
2740
2741 static void
2742 add_outer_distances (struct data_dependence_relation *ddr,
2743 lambda_vector dist_v, int index)
2744 {
2745 /* For each outer loop where init_v is not set, the accesses are
2746 in dependence of distance 1 in the loop. */
2747 while (--index >= 0)
2748 {
2749 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2750 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2751 save_v[index] = 1;
2752 save_dist_v (ddr, save_v);
2753 }
2754 }
2755
2756 /* Return false when fail to represent the data dependence as a
2757 distance vector. INIT_B is set to true when a component has been
2758 added to the distance vector DIST_V. INDEX_CARRY is then set to
2759 the index in DIST_V that carries the dependence. */
2760
2761 static bool
2762 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2763 struct data_reference *ddr_a,
2764 struct data_reference *ddr_b,
2765 lambda_vector dist_v, bool *init_b,
2766 int *index_carry)
2767 {
2768 unsigned i;
2769 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2770
2771 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2772 {
2773 tree access_fn_a, access_fn_b;
2774 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2775
2776 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2777 {
2778 non_affine_dependence_relation (ddr);
2779 return false;
2780 }
2781
2782 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2783 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2784
2785 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2786 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2787 {
2788 int dist, index;
2789 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2790 DDR_LOOP_NEST (ddr));
2791 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2792 DDR_LOOP_NEST (ddr));
2793
2794 /* The dependence is carried by the outermost loop. Example:
2795 | loop_1
2796 | A[{4, +, 1}_1]
2797 | loop_2
2798 | A[{5, +, 1}_2]
2799 | endloop_2
2800 | endloop_1
2801 In this case, the dependence is carried by loop_1. */
2802 index = index_a < index_b ? index_a : index_b;
2803 *index_carry = MIN (index, *index_carry);
2804
2805 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2806 {
2807 non_affine_dependence_relation (ddr);
2808 return false;
2809 }
2810
2811 dist = int_cst_value (SUB_DISTANCE (subscript));
2812
2813 /* This is the subscript coupling test. If we have already
2814 recorded a distance for this loop (a distance coming from
2815 another subscript), it should be the same. For example,
2816 in the following code, there is no dependence:
2817
2818 | loop i = 0, N, 1
2819 | T[i+1][i] = ...
2820 | ... = T[i][i]
2821 | endloop
2822 */
2823 if (init_v[index] != 0 && dist_v[index] != dist)
2824 {
2825 finalize_ddr_dependent (ddr, chrec_known);
2826 return false;
2827 }
2828
2829 dist_v[index] = dist;
2830 init_v[index] = 1;
2831 *init_b = true;
2832 }
2833 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2834 {
2835 /* This can be for example an affine vs. constant dependence
2836 (T[i] vs. T[3]) that is not an affine dependence and is
2837 not representable as a distance vector. */
2838 non_affine_dependence_relation (ddr);
2839 return false;
2840 }
2841 }
2842
2843 return true;
2844 }
2845
2846 /* Return true when the DDR contains only constant access functions. */
2847
2848 static bool
2849 constant_access_functions (const struct data_dependence_relation *ddr)
2850 {
2851 unsigned i;
2852
2853 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2854 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2855 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2856 return false;
2857
2858 return true;
2859 }
2860
2861 /* Helper function for the case where DDR_A and DDR_B are the same
2862 multivariate access function with a constant step. For an example
2863 see pr34635-1.c. */
2864
2865 static void
2866 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2867 {
2868 int x_1, x_2;
2869 tree c_1 = CHREC_LEFT (c_2);
2870 tree c_0 = CHREC_LEFT (c_1);
2871 lambda_vector dist_v;
2872 int v1, v2, cd;
2873
2874 /* Polynomials with more than 2 variables are not handled yet. When
2875 the evolution steps are parameters, it is not possible to
2876 represent the dependence using classical distance vectors. */
2877 if (TREE_CODE (c_0) != INTEGER_CST
2878 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2879 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2880 {
2881 DDR_AFFINE_P (ddr) = false;
2882 return;
2883 }
2884
2885 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2886 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2887
2888 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2889 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2890 v1 = int_cst_value (CHREC_RIGHT (c_1));
2891 v2 = int_cst_value (CHREC_RIGHT (c_2));
2892 cd = gcd (v1, v2);
2893 v1 /= cd;
2894 v2 /= cd;
2895
2896 if (v2 < 0)
2897 {
2898 v2 = -v2;
2899 v1 = -v1;
2900 }
2901
2902 dist_v[x_1] = v2;
2903 dist_v[x_2] = -v1;
2904 save_dist_v (ddr, dist_v);
2905
2906 add_outer_distances (ddr, dist_v, x_1);
2907 }
2908
2909 /* Helper function for the case where DDR_A and DDR_B are the same
2910 access functions. */
2911
2912 static void
2913 add_other_self_distances (struct data_dependence_relation *ddr)
2914 {
2915 lambda_vector dist_v;
2916 unsigned i;
2917 int index_carry = DDR_NB_LOOPS (ddr);
2918
2919 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2920 {
2921 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2922
2923 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2924 {
2925 if (!evolution_function_is_univariate_p (access_fun))
2926 {
2927 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2928 {
2929 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2930 return;
2931 }
2932
2933 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2934
2935 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2936 add_multivariate_self_dist (ddr, access_fun);
2937 else
2938 /* The evolution step is not constant: it varies in
2939 the outer loop, so this cannot be represented by a
2940 distance vector. For example in pr34635.c the
2941 evolution is {0, +, {0, +, 4}_1}_2. */
2942 DDR_AFFINE_P (ddr) = false;
2943
2944 return;
2945 }
2946
2947 index_carry = MIN (index_carry,
2948 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2949 DDR_LOOP_NEST (ddr)));
2950 }
2951 }
2952
2953 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2954 add_outer_distances (ddr, dist_v, index_carry);
2955 }
2956
2957 static void
2958 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2959 {
2960 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2961
2962 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2963 save_dist_v (ddr, dist_v);
2964 }
2965
2966 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2967 is the case for example when access functions are the same and
2968 equal to a constant, as in:
2969
2970 | loop_1
2971 | A[3] = ...
2972 | ... = A[3]
2973 | endloop_1
2974
2975 in which case the distance vectors are (0) and (1). */
2976
2977 static void
2978 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2979 {
2980 unsigned i, j;
2981
2982 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2983 {
2984 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2985 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2986 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2987
2988 for (j = 0; j < ca->n; j++)
2989 if (affine_function_zero_p (ca->fns[j]))
2990 {
2991 insert_innermost_unit_dist_vector (ddr);
2992 return;
2993 }
2994
2995 for (j = 0; j < cb->n; j++)
2996 if (affine_function_zero_p (cb->fns[j]))
2997 {
2998 insert_innermost_unit_dist_vector (ddr);
2999 return;
3000 }
3001 }
3002 }
3003
3004 /* Compute the classic per loop distance vector. DDR is the data
3005 dependence relation to build a vector from. Return false when fail
3006 to represent the data dependence as a distance vector. */
3007
3008 static bool
3009 build_classic_dist_vector (struct data_dependence_relation *ddr,
3010 struct loop *loop_nest)
3011 {
3012 bool init_b = false;
3013 int index_carry = DDR_NB_LOOPS (ddr);
3014 lambda_vector dist_v;
3015
3016 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3017 return false;
3018
3019 if (same_access_functions (ddr))
3020 {
3021 /* Save the 0 vector. */
3022 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3023 save_dist_v (ddr, dist_v);
3024
3025 if (constant_access_functions (ddr))
3026 add_distance_for_zero_overlaps (ddr);
3027
3028 if (DDR_NB_LOOPS (ddr) > 1)
3029 add_other_self_distances (ddr);
3030
3031 return true;
3032 }
3033
3034 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3035 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3036 dist_v, &init_b, &index_carry))
3037 return false;
3038
3039 /* Save the distance vector if we initialized one. */
3040 if (init_b)
3041 {
3042 /* Verify a basic constraint: classic distance vectors should
3043 always be lexicographically positive.
3044
3045 Data references are collected in the order of execution of
3046 the program, thus for the following loop
3047
3048 | for (i = 1; i < 100; i++)
3049 | for (j = 1; j < 100; j++)
3050 | {
3051 | t = T[j+1][i-1]; // A
3052 | T[j][i] = t + 2; // B
3053 | }
3054
3055 references are collected following the direction of the wind:
3056 A then B. The data dependence tests are performed also
3057 following this order, such that we're looking at the distance
3058 separating the elements accessed by A from the elements later
3059 accessed by B. But in this example, the distance returned by
3060 test_dep (A, B) is lexicographically negative (-1, 1), that
3061 means that the access A occurs later than B with respect to
3062 the outer loop, ie. we're actually looking upwind. In this
3063 case we solve test_dep (B, A) looking downwind to the
3064 lexicographically positive solution, that returns the
3065 distance vector (1, -1). */
3066 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3067 {
3068 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3069 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3070 loop_nest))
3071 return false;
3072 compute_subscript_distance (ddr);
3073 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3074 save_v, &init_b, &index_carry))
3075 return false;
3076 save_dist_v (ddr, save_v);
3077 DDR_REVERSED_P (ddr) = true;
3078
3079 /* In this case there is a dependence forward for all the
3080 outer loops:
3081
3082 | for (k = 1; k < 100; k++)
3083 | for (i = 1; i < 100; i++)
3084 | for (j = 1; j < 100; j++)
3085 | {
3086 | t = T[j+1][i-1]; // A
3087 | T[j][i] = t + 2; // B
3088 | }
3089
3090 the vectors are:
3091 (0, 1, -1)
3092 (1, 1, -1)
3093 (1, -1, 1)
3094 */
3095 if (DDR_NB_LOOPS (ddr) > 1)
3096 {
3097 add_outer_distances (ddr, save_v, index_carry);
3098 add_outer_distances (ddr, dist_v, index_carry);
3099 }
3100 }
3101 else
3102 {
3103 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3104 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3105
3106 if (DDR_NB_LOOPS (ddr) > 1)
3107 {
3108 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3109
3110 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3111 DDR_A (ddr), loop_nest))
3112 return false;
3113 compute_subscript_distance (ddr);
3114 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3115 opposite_v, &init_b,
3116 &index_carry))
3117 return false;
3118
3119 save_dist_v (ddr, save_v);
3120 add_outer_distances (ddr, dist_v, index_carry);
3121 add_outer_distances (ddr, opposite_v, index_carry);
3122 }
3123 else
3124 save_dist_v (ddr, save_v);
3125 }
3126 }
3127 else
3128 {
3129 /* There is a distance of 1 on all the outer loops: Example:
3130 there is a dependence of distance 1 on loop_1 for the array A.
3131
3132 | loop_1
3133 | A[5] = ...
3134 | endloop
3135 */
3136 add_outer_distances (ddr, dist_v,
3137 lambda_vector_first_nz (dist_v,
3138 DDR_NB_LOOPS (ddr), 0));
3139 }
3140
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 {
3143 unsigned i;
3144
3145 fprintf (dump_file, "(build_classic_dist_vector\n");
3146 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3147 {
3148 fprintf (dump_file, " dist_vector = (");
3149 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3150 DDR_NB_LOOPS (ddr));
3151 fprintf (dump_file, " )\n");
3152 }
3153 fprintf (dump_file, ")\n");
3154 }
3155
3156 return true;
3157 }
3158
3159 /* Return the direction for a given distance.
3160 FIXME: Computing dir this way is suboptimal, since dir can catch
3161 cases that dist is unable to represent. */
3162
3163 static inline enum data_dependence_direction
3164 dir_from_dist (int dist)
3165 {
3166 if (dist > 0)
3167 return dir_positive;
3168 else if (dist < 0)
3169 return dir_negative;
3170 else
3171 return dir_equal;
3172 }
3173
3174 /* Compute the classic per loop direction vector. DDR is the data
3175 dependence relation to build a vector from. */
3176
3177 static void
3178 build_classic_dir_vector (struct data_dependence_relation *ddr)
3179 {
3180 unsigned i, j;
3181 lambda_vector dist_v;
3182
3183 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3184 {
3185 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3186
3187 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3188 dir_v[j] = dir_from_dist (dist_v[j]);
3189
3190 save_dir_v (ddr, dir_v);
3191 }
3192 }
3193
3194 /* Helper function. Returns true when there is a dependence between
3195 data references DRA and DRB. */
3196
3197 static bool
3198 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3199 struct data_reference *dra,
3200 struct data_reference *drb,
3201 struct loop *loop_nest)
3202 {
3203 unsigned int i;
3204 tree last_conflicts;
3205 struct subscript *subscript;
3206
3207 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3208 i++)
3209 {
3210 conflict_function *overlaps_a, *overlaps_b;
3211
3212 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3213 DR_ACCESS_FN (drb, i),
3214 &overlaps_a, &overlaps_b,
3215 &last_conflicts, loop_nest);
3216
3217 if (CF_NOT_KNOWN_P (overlaps_a)
3218 || CF_NOT_KNOWN_P (overlaps_b))
3219 {
3220 finalize_ddr_dependent (ddr, chrec_dont_know);
3221 dependence_stats.num_dependence_undetermined++;
3222 free_conflict_function (overlaps_a);
3223 free_conflict_function (overlaps_b);
3224 return false;
3225 }
3226
3227 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3228 || CF_NO_DEPENDENCE_P (overlaps_b))
3229 {
3230 finalize_ddr_dependent (ddr, chrec_known);
3231 dependence_stats.num_dependence_independent++;
3232 free_conflict_function (overlaps_a);
3233 free_conflict_function (overlaps_b);
3234 return false;
3235 }
3236
3237 else
3238 {
3239 if (SUB_CONFLICTS_IN_A (subscript))
3240 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3241 if (SUB_CONFLICTS_IN_B (subscript))
3242 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3243
3244 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3245 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3246 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3247 }
3248 }
3249
3250 return true;
3251 }
3252
3253 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3254
3255 static void
3256 subscript_dependence_tester (struct data_dependence_relation *ddr,
3257 struct loop *loop_nest)
3258 {
3259
3260 if (dump_file && (dump_flags & TDF_DETAILS))
3261 fprintf (dump_file, "(subscript_dependence_tester \n");
3262
3263 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3264 dependence_stats.num_dependence_dependent++;
3265
3266 compute_subscript_distance (ddr);
3267 if (build_classic_dist_vector (ddr, loop_nest))
3268 build_classic_dir_vector (ddr);
3269
3270 if (dump_file && (dump_flags & TDF_DETAILS))
3271 fprintf (dump_file, ")\n");
3272 }
3273
3274 /* Returns true when all the access functions of A are affine or
3275 constant with respect to LOOP_NEST. */
3276
3277 static bool
3278 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3279 const struct loop *loop_nest)
3280 {
3281 unsigned int i;
3282 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3283 tree t;
3284
3285 FOR_EACH_VEC_ELT (tree, fns, i, t)
3286 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3287 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3288 return false;
3289
3290 return true;
3291 }
3292
3293 /* Initializes an equation for an OMEGA problem using the information
3294 contained in the ACCESS_FUN. Returns true when the operation
3295 succeeded.
3296
3297 PB is the omega constraint system.
3298 EQ is the number of the equation to be initialized.
3299 OFFSET is used for shifting the variables names in the constraints:
3300 a constrain is composed of 2 * the number of variables surrounding
3301 dependence accesses. OFFSET is set either to 0 for the first n variables,
3302 then it is set to n.
3303 ACCESS_FUN is expected to be an affine chrec. */
3304
3305 static bool
3306 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3307 unsigned int offset, tree access_fun,
3308 struct data_dependence_relation *ddr)
3309 {
3310 switch (TREE_CODE (access_fun))
3311 {
3312 case POLYNOMIAL_CHREC:
3313 {
3314 tree left = CHREC_LEFT (access_fun);
3315 tree right = CHREC_RIGHT (access_fun);
3316 int var = CHREC_VARIABLE (access_fun);
3317 unsigned var_idx;
3318
3319 if (TREE_CODE (right) != INTEGER_CST)
3320 return false;
3321
3322 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3323 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3324
3325 /* Compute the innermost loop index. */
3326 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3327
3328 if (offset == 0)
3329 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3330 += int_cst_value (right);
3331
3332 switch (TREE_CODE (left))
3333 {
3334 case POLYNOMIAL_CHREC:
3335 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3336
3337 case INTEGER_CST:
3338 pb->eqs[eq].coef[0] += int_cst_value (left);
3339 return true;
3340
3341 default:
3342 return false;
3343 }
3344 }
3345
3346 case INTEGER_CST:
3347 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3348 return true;
3349
3350 default:
3351 return false;
3352 }
3353 }
3354
3355 /* As explained in the comments preceding init_omega_for_ddr, we have
3356 to set up a system for each loop level, setting outer loops
3357 variation to zero, and current loop variation to positive or zero.
3358 Save each lexico positive distance vector. */
3359
3360 static void
3361 omega_extract_distance_vectors (omega_pb pb,
3362 struct data_dependence_relation *ddr)
3363 {
3364 int eq, geq;
3365 unsigned i, j;
3366 struct loop *loopi, *loopj;
3367 enum omega_result res;
3368
3369 /* Set a new problem for each loop in the nest. The basis is the
3370 problem that we have initialized until now. On top of this we
3371 add new constraints. */
3372 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3373 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3374 {
3375 int dist = 0;
3376 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3377 DDR_NB_LOOPS (ddr));
3378
3379 omega_copy_problem (copy, pb);
3380
3381 /* For all the outer loops "loop_j", add "dj = 0". */
3382 for (j = 0;
3383 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3384 {
3385 eq = omega_add_zero_eq (copy, omega_black);
3386 copy->eqs[eq].coef[j + 1] = 1;
3387 }
3388
3389 /* For "loop_i", add "0 <= di". */
3390 geq = omega_add_zero_geq (copy, omega_black);
3391 copy->geqs[geq].coef[i + 1] = 1;
3392
3393 /* Reduce the constraint system, and test that the current
3394 problem is feasible. */
3395 res = omega_simplify_problem (copy);
3396 if (res == omega_false
3397 || res == omega_unknown
3398 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3399 goto next_problem;
3400
3401 for (eq = 0; eq < copy->num_subs; eq++)
3402 if (copy->subs[eq].key == (int) i + 1)
3403 {
3404 dist = copy->subs[eq].coef[0];
3405 goto found_dist;
3406 }
3407
3408 if (dist == 0)
3409 {
3410 /* Reinitialize problem... */
3411 omega_copy_problem (copy, pb);
3412 for (j = 0;
3413 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3414 {
3415 eq = omega_add_zero_eq (copy, omega_black);
3416 copy->eqs[eq].coef[j + 1] = 1;
3417 }
3418
3419 /* ..., but this time "di = 1". */
3420 eq = omega_add_zero_eq (copy, omega_black);
3421 copy->eqs[eq].coef[i + 1] = 1;
3422 copy->eqs[eq].coef[0] = -1;
3423
3424 res = omega_simplify_problem (copy);
3425 if (res == omega_false
3426 || res == omega_unknown
3427 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3428 goto next_problem;
3429
3430 for (eq = 0; eq < copy->num_subs; eq++)
3431 if (copy->subs[eq].key == (int) i + 1)
3432 {
3433 dist = copy->subs[eq].coef[0];
3434 goto found_dist;
3435 }
3436 }
3437
3438 found_dist:;
3439 /* Save the lexicographically positive distance vector. */
3440 if (dist >= 0)
3441 {
3442 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3443 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3444
3445 dist_v[i] = dist;
3446
3447 for (eq = 0; eq < copy->num_subs; eq++)
3448 if (copy->subs[eq].key > 0)
3449 {
3450 dist = copy->subs[eq].coef[0];
3451 dist_v[copy->subs[eq].key - 1] = dist;
3452 }
3453
3454 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3455 dir_v[j] = dir_from_dist (dist_v[j]);
3456
3457 save_dist_v (ddr, dist_v);
3458 save_dir_v (ddr, dir_v);
3459 }
3460
3461 next_problem:;
3462 omega_free_problem (copy);
3463 }
3464 }
3465
3466 /* This is called for each subscript of a tuple of data references:
3467 insert an equality for representing the conflicts. */
3468
3469 static bool
3470 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3471 struct data_dependence_relation *ddr,
3472 omega_pb pb, bool *maybe_dependent)
3473 {
3474 int eq;
3475 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3476 TREE_TYPE (access_fun_b));
3477 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3478 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3479 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3480 tree minus_one;
3481
3482 /* When the fun_a - fun_b is not constant, the dependence is not
3483 captured by the classic distance vector representation. */
3484 if (TREE_CODE (difference) != INTEGER_CST)
3485 return false;
3486
3487 /* ZIV test. */
3488 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3489 {
3490 /* There is no dependence. */
3491 *maybe_dependent = false;
3492 return true;
3493 }
3494
3495 minus_one = build_int_cst (type, -1);
3496 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3497
3498 eq = omega_add_zero_eq (pb, omega_black);
3499 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3500 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3501 /* There is probably a dependence, but the system of
3502 constraints cannot be built: answer "don't know". */
3503 return false;
3504
3505 /* GCD test. */
3506 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3507 && !int_divides_p (lambda_vector_gcd
3508 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3509 2 * DDR_NB_LOOPS (ddr)),
3510 pb->eqs[eq].coef[0]))
3511 {
3512 /* There is no dependence. */
3513 *maybe_dependent = false;
3514 return true;
3515 }
3516
3517 return true;
3518 }
3519
3520 /* Helper function, same as init_omega_for_ddr but specialized for
3521 data references A and B. */
3522
3523 static bool
3524 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3525 struct data_dependence_relation *ddr,
3526 omega_pb pb, bool *maybe_dependent)
3527 {
3528 unsigned i;
3529 int ineq;
3530 struct loop *loopi;
3531 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3532
3533 /* Insert an equality per subscript. */
3534 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3535 {
3536 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3537 ddr, pb, maybe_dependent))
3538 return false;
3539 else if (*maybe_dependent == false)
3540 {
3541 /* There is no dependence. */
3542 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3543 return true;
3544 }
3545 }
3546
3547 /* Insert inequalities: constraints corresponding to the iteration
3548 domain, i.e. the loops surrounding the references "loop_x" and
3549 the distance variables "dx". The layout of the OMEGA
3550 representation is as follows:
3551 - coef[0] is the constant
3552 - coef[1..nb_loops] are the protected variables that will not be
3553 removed by the solver: the "dx"
3554 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3555 */
3556 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3557 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3558 {
3559 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3560
3561 /* 0 <= loop_x */
3562 ineq = omega_add_zero_geq (pb, omega_black);
3563 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3564
3565 /* 0 <= loop_x + dx */
3566 ineq = omega_add_zero_geq (pb, omega_black);
3567 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3568 pb->geqs[ineq].coef[i + 1] = 1;
3569
3570 if (nbi != -1)
3571 {
3572 /* loop_x <= nb_iters */
3573 ineq = omega_add_zero_geq (pb, omega_black);
3574 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3575 pb->geqs[ineq].coef[0] = nbi;
3576
3577 /* loop_x + dx <= nb_iters */
3578 ineq = omega_add_zero_geq (pb, omega_black);
3579 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3580 pb->geqs[ineq].coef[i + 1] = -1;
3581 pb->geqs[ineq].coef[0] = nbi;
3582
3583 /* A step "dx" bigger than nb_iters is not feasible, so
3584 add "0 <= nb_iters + dx", */
3585 ineq = omega_add_zero_geq (pb, omega_black);
3586 pb->geqs[ineq].coef[i + 1] = 1;
3587 pb->geqs[ineq].coef[0] = nbi;
3588 /* and "dx <= nb_iters". */
3589 ineq = omega_add_zero_geq (pb, omega_black);
3590 pb->geqs[ineq].coef[i + 1] = -1;
3591 pb->geqs[ineq].coef[0] = nbi;
3592 }
3593 }
3594
3595 omega_extract_distance_vectors (pb, ddr);
3596
3597 return true;
3598 }
3599
3600 /* Sets up the Omega dependence problem for the data dependence
3601 relation DDR. Returns false when the constraint system cannot be
3602 built, ie. when the test answers "don't know". Returns true
3603 otherwise, and when independence has been proved (using one of the
3604 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3605 set MAYBE_DEPENDENT to true.
3606
3607 Example: for setting up the dependence system corresponding to the
3608 conflicting accesses
3609
3610 | loop_i
3611 | loop_j
3612 | A[i, i+1] = ...
3613 | ... A[2*j, 2*(i + j)]
3614 | endloop_j
3615 | endloop_i
3616
3617 the following constraints come from the iteration domain:
3618
3619 0 <= i <= Ni
3620 0 <= i + di <= Ni
3621 0 <= j <= Nj
3622 0 <= j + dj <= Nj
3623
3624 where di, dj are the distance variables. The constraints
3625 representing the conflicting elements are:
3626
3627 i = 2 * (j + dj)
3628 i + 1 = 2 * (i + di + j + dj)
3629
3630 For asking that the resulting distance vector (di, dj) be
3631 lexicographically positive, we insert the constraint "di >= 0". If
3632 "di = 0" in the solution, we fix that component to zero, and we
3633 look at the inner loops: we set a new problem where all the outer
3634 loop distances are zero, and fix this inner component to be
3635 positive. When one of the components is positive, we save that
3636 distance, and set a new problem where the distance on this loop is
3637 zero, searching for other distances in the inner loops. Here is
3638 the classic example that illustrates that we have to set for each
3639 inner loop a new problem:
3640
3641 | loop_1
3642 | loop_2
3643 | A[10]
3644 | endloop_2
3645 | endloop_1
3646
3647 we have to save two distances (1, 0) and (0, 1).
3648
3649 Given two array references, refA and refB, we have to set the
3650 dependence problem twice, refA vs. refB and refB vs. refA, and we
3651 cannot do a single test, as refB might occur before refA in the
3652 inner loops, and the contrary when considering outer loops: ex.
3653
3654 | loop_0
3655 | loop_1
3656 | loop_2
3657 | T[{1,+,1}_2][{1,+,1}_1] // refA
3658 | T[{2,+,1}_2][{0,+,1}_1] // refB
3659 | endloop_2
3660 | endloop_1
3661 | endloop_0
3662
3663 refB touches the elements in T before refA, and thus for the same
3664 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3665 but for successive loop_0 iterations, we have (1, -1, 1)
3666
3667 The Omega solver expects the distance variables ("di" in the
3668 previous example) to come first in the constraint system (as
3669 variables to be protected, or "safe" variables), the constraint
3670 system is built using the following layout:
3671
3672 "cst | distance vars | index vars".
3673 */
3674
3675 static bool
3676 init_omega_for_ddr (struct data_dependence_relation *ddr,
3677 bool *maybe_dependent)
3678 {
3679 omega_pb pb;
3680 bool res = false;
3681
3682 *maybe_dependent = true;
3683
3684 if (same_access_functions (ddr))
3685 {
3686 unsigned j;
3687 lambda_vector dir_v;
3688
3689 /* Save the 0 vector. */
3690 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3691 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3692 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3693 dir_v[j] = dir_equal;
3694 save_dir_v (ddr, dir_v);
3695
3696 /* Save the dependences carried by outer loops. */
3697 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3698 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3699 maybe_dependent);
3700 omega_free_problem (pb);
3701 return res;
3702 }
3703
3704 /* Omega expects the protected variables (those that have to be kept
3705 after elimination) to appear first in the constraint system.
3706 These variables are the distance variables. In the following
3707 initialization we declare NB_LOOPS safe variables, and the total
3708 number of variables for the constraint system is 2*NB_LOOPS. */
3709 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3710 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3711 maybe_dependent);
3712 omega_free_problem (pb);
3713
3714 /* Stop computation if not decidable, or no dependence. */
3715 if (res == false || *maybe_dependent == false)
3716 return res;
3717
3718 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3719 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3720 maybe_dependent);
3721 omega_free_problem (pb);
3722
3723 return res;
3724 }
3725
3726 /* Return true when DDR contains the same information as that stored
3727 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3728
3729 static bool
3730 ddr_consistent_p (FILE *file,
3731 struct data_dependence_relation *ddr,
3732 VEC (lambda_vector, heap) *dist_vects,
3733 VEC (lambda_vector, heap) *dir_vects)
3734 {
3735 unsigned int i, j;
3736
3737 /* If dump_file is set, output there. */
3738 if (dump_file && (dump_flags & TDF_DETAILS))
3739 file = dump_file;
3740
3741 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3742 {
3743 lambda_vector b_dist_v;
3744 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3745 VEC_length (lambda_vector, dist_vects),
3746 DDR_NUM_DIST_VECTS (ddr));
3747
3748 fprintf (file, "Banerjee dist vectors:\n");
3749 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3750 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3751
3752 fprintf (file, "Omega dist vectors:\n");
3753 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3754 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3755
3756 fprintf (file, "data dependence relation:\n");
3757 dump_data_dependence_relation (file, ddr);
3758
3759 fprintf (file, ")\n");
3760 return false;
3761 }
3762
3763 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3764 {
3765 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3766 VEC_length (lambda_vector, dir_vects),
3767 DDR_NUM_DIR_VECTS (ddr));
3768 return false;
3769 }
3770
3771 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3772 {
3773 lambda_vector a_dist_v;
3774 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3775
3776 /* Distance vectors are not ordered in the same way in the DDR
3777 and in the DIST_VECTS: search for a matching vector. */
3778 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3779 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3780 break;
3781
3782 if (j == VEC_length (lambda_vector, dist_vects))
3783 {
3784 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3785 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3786 fprintf (file, "not found in Omega dist vectors:\n");
3787 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3788 fprintf (file, "data dependence relation:\n");
3789 dump_data_dependence_relation (file, ddr);
3790 fprintf (file, ")\n");
3791 }
3792 }
3793
3794 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3795 {
3796 lambda_vector a_dir_v;
3797 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3798
3799 /* Direction vectors are not ordered in the same way in the DDR
3800 and in the DIR_VECTS: search for a matching vector. */
3801 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3802 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3803 break;
3804
3805 if (j == VEC_length (lambda_vector, dist_vects))
3806 {
3807 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3808 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3809 fprintf (file, "not found in Omega dir vectors:\n");
3810 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3811 fprintf (file, "data dependence relation:\n");
3812 dump_data_dependence_relation (file, ddr);
3813 fprintf (file, ")\n");
3814 }
3815 }
3816
3817 return true;
3818 }
3819
3820 /* This computes the affine dependence relation between A and B with
3821 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3822 independence between two accesses, while CHREC_DONT_KNOW is used
3823 for representing the unknown relation.
3824
3825 Note that it is possible to stop the computation of the dependence
3826 relation the first time we detect a CHREC_KNOWN element for a given
3827 subscript. */
3828
3829 static void
3830 compute_affine_dependence (struct data_dependence_relation *ddr,
3831 struct loop *loop_nest)
3832 {
3833 struct data_reference *dra = DDR_A (ddr);
3834 struct data_reference *drb = DDR_B (ddr);
3835
3836 if (dump_file && (dump_flags & TDF_DETAILS))
3837 {
3838 fprintf (dump_file, "(compute_affine_dependence\n");
3839 fprintf (dump_file, " (stmt_a = \n");
3840 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3841 fprintf (dump_file, ")\n (stmt_b = \n");
3842 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3843 fprintf (dump_file, ")\n");
3844 }
3845
3846 /* Analyze only when the dependence relation is not yet known. */
3847 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3848 && !DDR_SELF_REFERENCE (ddr))
3849 {
3850 dependence_stats.num_dependence_tests++;
3851
3852 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3853 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3854 {
3855 if (flag_check_data_deps)
3856 {
3857 /* Compute the dependences using the first algorithm. */
3858 subscript_dependence_tester (ddr, loop_nest);
3859
3860 if (dump_file && (dump_flags & TDF_DETAILS))
3861 {
3862 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3863 dump_data_dependence_relation (dump_file, ddr);
3864 }
3865
3866 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3867 {
3868 bool maybe_dependent;
3869 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3870
3871 /* Save the result of the first DD analyzer. */
3872 dist_vects = DDR_DIST_VECTS (ddr);
3873 dir_vects = DDR_DIR_VECTS (ddr);
3874
3875 /* Reset the information. */
3876 DDR_DIST_VECTS (ddr) = NULL;
3877 DDR_DIR_VECTS (ddr) = NULL;
3878
3879 /* Compute the same information using Omega. */
3880 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3881 goto csys_dont_know;
3882
3883 if (dump_file && (dump_flags & TDF_DETAILS))
3884 {
3885 fprintf (dump_file, "Omega Analyzer\n");
3886 dump_data_dependence_relation (dump_file, ddr);
3887 }
3888
3889 /* Check that we get the same information. */
3890 if (maybe_dependent)
3891 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3892 dir_vects));
3893 }
3894 }
3895 else
3896 subscript_dependence_tester (ddr, loop_nest);
3897 }
3898
3899 /* As a last case, if the dependence cannot be determined, or if
3900 the dependence is considered too difficult to determine, answer
3901 "don't know". */
3902 else
3903 {
3904 csys_dont_know:;
3905 dependence_stats.num_dependence_undetermined++;
3906
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3908 {
3909 fprintf (dump_file, "Data ref a:\n");
3910 dump_data_reference (dump_file, dra);
3911 fprintf (dump_file, "Data ref b:\n");
3912 dump_data_reference (dump_file, drb);
3913 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3914 }
3915 finalize_ddr_dependent (ddr, chrec_dont_know);
3916 }
3917 }
3918
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (dump_file, ")\n");
3921 }
3922
3923 /* This computes the dependence relation for the same data
3924 reference into DDR. */
3925
3926 static void
3927 compute_self_dependence (struct data_dependence_relation *ddr)
3928 {
3929 unsigned int i;
3930 struct subscript *subscript;
3931
3932 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3933 return;
3934
3935 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3936 i++)
3937 {
3938 if (SUB_CONFLICTS_IN_A (subscript))
3939 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3940 if (SUB_CONFLICTS_IN_B (subscript))
3941 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3942
3943 /* The accessed index overlaps for each iteration. */
3944 SUB_CONFLICTS_IN_A (subscript)
3945 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3946 SUB_CONFLICTS_IN_B (subscript)
3947 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3948 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3949 }
3950
3951 /* The distance vector is the zero vector. */
3952 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3953 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3954 }
3955
3956 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3957 the data references in DATAREFS, in the LOOP_NEST. When
3958 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3959 relations. */
3960
3961 void
3962 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3963 VEC (ddr_p, heap) **dependence_relations,
3964 VEC (loop_p, heap) *loop_nest,
3965 bool compute_self_and_rr)
3966 {
3967 struct data_dependence_relation *ddr;
3968 struct data_reference *a, *b;
3969 unsigned int i, j;
3970
3971 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3972 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3973 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
3974 {
3975 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3976 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3977 if (loop_nest)
3978 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3979 }
3980
3981 if (compute_self_and_rr)
3982 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3983 {
3984 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3985 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3986 compute_self_dependence (ddr);
3987 }
3988 }
3989
3990 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3991 true if STMT clobbers memory, false otherwise. */
3992
3993 bool
3994 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
3995 {
3996 bool clobbers_memory = false;
3997 data_ref_loc *ref;
3998 tree *op0, *op1;
3999 enum gimple_code stmt_code = gimple_code (stmt);
4000
4001 *references = NULL;
4002
4003 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4004 Calls have side-effects, except those to const or pure
4005 functions. */
4006 if ((stmt_code == GIMPLE_CALL
4007 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4008 || (stmt_code == GIMPLE_ASM
4009 && gimple_asm_volatile_p (stmt)))
4010 clobbers_memory = true;
4011
4012 if (!gimple_vuse (stmt))
4013 return clobbers_memory;
4014
4015 if (stmt_code == GIMPLE_ASSIGN)
4016 {
4017 tree base;
4018 op0 = gimple_assign_lhs_ptr (stmt);
4019 op1 = gimple_assign_rhs1_ptr (stmt);
4020
4021 if (DECL_P (*op1)
4022 || (REFERENCE_CLASS_P (*op1)
4023 && (base = get_base_address (*op1))
4024 && TREE_CODE (base) != SSA_NAME))
4025 {
4026 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4027 ref->pos = op1;
4028 ref->is_read = true;
4029 }
4030
4031 if (DECL_P (*op0)
4032 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4033 {
4034 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4035 ref->pos = op0;
4036 ref->is_read = false;
4037 }
4038 }
4039 else if (stmt_code == GIMPLE_CALL)
4040 {
4041 unsigned i, n = gimple_call_num_args (stmt);
4042
4043 for (i = 0; i < n; i++)
4044 {
4045 op0 = gimple_call_arg_ptr (stmt, i);
4046
4047 if (DECL_P (*op0)
4048 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4049 {
4050 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4051 ref->pos = op0;
4052 ref->is_read = true;
4053 }
4054 }
4055 }
4056
4057 return clobbers_memory;
4058 }
4059
4060 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4061 reference, returns false, otherwise returns true. NEST is the outermost
4062 loop of the loop nest in which the references should be analyzed. */
4063
4064 bool
4065 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4066 VEC (data_reference_p, heap) **datarefs)
4067 {
4068 unsigned i;
4069 VEC (data_ref_loc, heap) *references;
4070 data_ref_loc *ref;
4071 bool ret = true;
4072 data_reference_p dr;
4073
4074 if (get_references_in_stmt (stmt, &references))
4075 {
4076 VEC_free (data_ref_loc, heap, references);
4077 return false;
4078 }
4079
4080 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4081 {
4082 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4083 *ref->pos, stmt, ref->is_read);
4084 gcc_assert (dr != NULL);
4085
4086 /* FIXME -- data dependence analysis does not work correctly for objects
4087 with invariant addresses in loop nests. Let us fail here until the
4088 problem is fixed. */
4089 if (dr_address_invariant_p (dr) && nest)
4090 {
4091 free_data_ref (dr);
4092 if (dump_file && (dump_flags & TDF_DETAILS))
4093 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4094 ret = false;
4095 break;
4096 }
4097
4098 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4099 }
4100 VEC_free (data_ref_loc, heap, references);
4101 return ret;
4102 }
4103
4104 /* Stores the data references in STMT to DATAREFS. If there is an
4105 unanalyzable reference, returns false, otherwise returns true.
4106 NEST is the outermost loop of the loop nest in which the references
4107 should be instantiated, LOOP is the loop in which the references
4108 should be analyzed. */
4109
4110 bool
4111 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4112 VEC (data_reference_p, heap) **datarefs)
4113 {
4114 unsigned i;
4115 VEC (data_ref_loc, heap) *references;
4116 data_ref_loc *ref;
4117 bool ret = true;
4118 data_reference_p dr;
4119
4120 if (get_references_in_stmt (stmt, &references))
4121 {
4122 VEC_free (data_ref_loc, heap, references);
4123 return false;
4124 }
4125
4126 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4127 {
4128 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4129 gcc_assert (dr != NULL);
4130 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4131 }
4132
4133 VEC_free (data_ref_loc, heap, references);
4134 return ret;
4135 }
4136
4137 /* Search the data references in LOOP, and record the information into
4138 DATAREFS. Returns chrec_dont_know when failing to analyze a
4139 difficult case, returns NULL_TREE otherwise. */
4140
4141 static tree
4142 find_data_references_in_bb (struct loop *loop, basic_block bb,
4143 VEC (data_reference_p, heap) **datarefs)
4144 {
4145 gimple_stmt_iterator bsi;
4146
4147 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4148 {
4149 gimple stmt = gsi_stmt (bsi);
4150
4151 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4152 {
4153 struct data_reference *res;
4154 res = XCNEW (struct data_reference);
4155 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4156
4157 return chrec_dont_know;
4158 }
4159 }
4160
4161 return NULL_TREE;
4162 }
4163
4164 /* Search the data references in LOOP, and record the information into
4165 DATAREFS. Returns chrec_dont_know when failing to analyze a
4166 difficult case, returns NULL_TREE otherwise.
4167
4168 TODO: This function should be made smarter so that it can handle address
4169 arithmetic as if they were array accesses, etc. */
4170
4171 tree
4172 find_data_references_in_loop (struct loop *loop,
4173 VEC (data_reference_p, heap) **datarefs)
4174 {
4175 basic_block bb, *bbs;
4176 unsigned int i;
4177
4178 bbs = get_loop_body_in_dom_order (loop);
4179
4180 for (i = 0; i < loop->num_nodes; i++)
4181 {
4182 bb = bbs[i];
4183
4184 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4185 {
4186 free (bbs);
4187 return chrec_dont_know;
4188 }
4189 }
4190 free (bbs);
4191
4192 return NULL_TREE;
4193 }
4194
4195 /* Recursive helper function. */
4196
4197 static bool
4198 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4199 {
4200 /* Inner loops of the nest should not contain siblings. Example:
4201 when there are two consecutive loops,
4202
4203 | loop_0
4204 | loop_1
4205 | A[{0, +, 1}_1]
4206 | endloop_1
4207 | loop_2
4208 | A[{0, +, 1}_2]
4209 | endloop_2
4210 | endloop_0
4211
4212 the dependence relation cannot be captured by the distance
4213 abstraction. */
4214 if (loop->next)
4215 return false;
4216
4217 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4218 if (loop->inner)
4219 return find_loop_nest_1 (loop->inner, loop_nest);
4220 return true;
4221 }
4222
4223 /* Return false when the LOOP is not well nested. Otherwise return
4224 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4225 contain the loops from the outermost to the innermost, as they will
4226 appear in the classic distance vector. */
4227
4228 bool
4229 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4230 {
4231 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4232 if (loop->inner)
4233 return find_loop_nest_1 (loop->inner, loop_nest);
4234 return true;
4235 }
4236
4237 /* Returns true when the data dependences have been computed, false otherwise.
4238 Given a loop nest LOOP, the following vectors are returned:
4239 DATAREFS is initialized to all the array elements contained in this loop,
4240 DEPENDENCE_RELATIONS contains the relations between the data references.
4241 Compute read-read and self relations if
4242 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4243
4244 bool
4245 compute_data_dependences_for_loop (struct loop *loop,
4246 bool compute_self_and_read_read_dependences,
4247 VEC (loop_p, heap) **loop_nest,
4248 VEC (data_reference_p, heap) **datarefs,
4249 VEC (ddr_p, heap) **dependence_relations)
4250 {
4251 bool res = true;
4252
4253 memset (&dependence_stats, 0, sizeof (dependence_stats));
4254
4255 /* If the loop nest is not well formed, or one of the data references
4256 is not computable, give up without spending time to compute other
4257 dependences. */
4258 if (!loop
4259 || !find_loop_nest (loop, loop_nest)
4260 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4261 {
4262 struct data_dependence_relation *ddr;
4263
4264 /* Insert a single relation into dependence_relations:
4265 chrec_dont_know. */
4266 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4267 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4268 res = false;
4269 }
4270 else
4271 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4272 compute_self_and_read_read_dependences);
4273
4274 if (dump_file && (dump_flags & TDF_STATS))
4275 {
4276 fprintf (dump_file, "Dependence tester statistics:\n");
4277
4278 fprintf (dump_file, "Number of dependence tests: %d\n",
4279 dependence_stats.num_dependence_tests);
4280 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4281 dependence_stats.num_dependence_dependent);
4282 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4283 dependence_stats.num_dependence_independent);
4284 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4285 dependence_stats.num_dependence_undetermined);
4286
4287 fprintf (dump_file, "Number of subscript tests: %d\n",
4288 dependence_stats.num_subscript_tests);
4289 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4290 dependence_stats.num_subscript_undetermined);
4291 fprintf (dump_file, "Number of same subscript function: %d\n",
4292 dependence_stats.num_same_subscript_function);
4293
4294 fprintf (dump_file, "Number of ziv tests: %d\n",
4295 dependence_stats.num_ziv);
4296 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4297 dependence_stats.num_ziv_dependent);
4298 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4299 dependence_stats.num_ziv_independent);
4300 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4301 dependence_stats.num_ziv_unimplemented);
4302
4303 fprintf (dump_file, "Number of siv tests: %d\n",
4304 dependence_stats.num_siv);
4305 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4306 dependence_stats.num_siv_dependent);
4307 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4308 dependence_stats.num_siv_independent);
4309 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4310 dependence_stats.num_siv_unimplemented);
4311
4312 fprintf (dump_file, "Number of miv tests: %d\n",
4313 dependence_stats.num_miv);
4314 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4315 dependence_stats.num_miv_dependent);
4316 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4317 dependence_stats.num_miv_independent);
4318 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4319 dependence_stats.num_miv_unimplemented);
4320 }
4321
4322 return res;
4323 }
4324
4325 /* Returns true when the data dependences for the basic block BB have been
4326 computed, false otherwise.
4327 DATAREFS is initialized to all the array elements contained in this basic
4328 block, DEPENDENCE_RELATIONS contains the relations between the data
4329 references. Compute read-read and self relations if
4330 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4331 bool
4332 compute_data_dependences_for_bb (basic_block bb,
4333 bool compute_self_and_read_read_dependences,
4334 VEC (data_reference_p, heap) **datarefs,
4335 VEC (ddr_p, heap) **dependence_relations)
4336 {
4337 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4338 return false;
4339
4340 compute_all_dependences (*datarefs, dependence_relations, NULL,
4341 compute_self_and_read_read_dependences);
4342 return true;
4343 }
4344
4345 /* Entry point (for testing only). Analyze all the data references
4346 and the dependence relations in LOOP.
4347
4348 The data references are computed first.
4349
4350 A relation on these nodes is represented by a complete graph. Some
4351 of the relations could be of no interest, thus the relations can be
4352 computed on demand.
4353
4354 In the following function we compute all the relations. This is
4355 just a first implementation that is here for:
4356 - for showing how to ask for the dependence relations,
4357 - for the debugging the whole dependence graph,
4358 - for the dejagnu testcases and maintenance.
4359
4360 It is possible to ask only for a part of the graph, avoiding to
4361 compute the whole dependence graph. The computed dependences are
4362 stored in a knowledge base (KB) such that later queries don't
4363 recompute the same information. The implementation of this KB is
4364 transparent to the optimizer, and thus the KB can be changed with a
4365 more efficient implementation, or the KB could be disabled. */
4366 static void
4367 analyze_all_data_dependences (struct loop *loop)
4368 {
4369 unsigned int i;
4370 int nb_data_refs = 10;
4371 VEC (data_reference_p, heap) *datarefs =
4372 VEC_alloc (data_reference_p, heap, nb_data_refs);
4373 VEC (ddr_p, heap) *dependence_relations =
4374 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4375 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4376
4377 /* Compute DDs on the whole function. */
4378 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4379 &dependence_relations);
4380
4381 if (dump_file)
4382 {
4383 dump_data_dependence_relations (dump_file, dependence_relations);
4384 fprintf (dump_file, "\n\n");
4385
4386 if (dump_flags & TDF_DETAILS)
4387 dump_dist_dir_vectors (dump_file, dependence_relations);
4388
4389 if (dump_flags & TDF_STATS)
4390 {
4391 unsigned nb_top_relations = 0;
4392 unsigned nb_bot_relations = 0;
4393 unsigned nb_chrec_relations = 0;
4394 struct data_dependence_relation *ddr;
4395
4396 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4397 {
4398 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4399 nb_top_relations++;
4400
4401 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4402 nb_bot_relations++;
4403
4404 else
4405 nb_chrec_relations++;
4406 }
4407
4408 gather_stats_on_scev_database ();
4409 }
4410 }
4411
4412 VEC_free (loop_p, heap, loop_nest);
4413 free_dependence_relations (dependence_relations);
4414 free_data_refs (datarefs);
4415 }
4416
4417 /* Computes all the data dependences and check that the results of
4418 several analyzers are the same. */
4419
4420 void
4421 tree_check_data_deps (void)
4422 {
4423 loop_iterator li;
4424 struct loop *loop_nest;
4425
4426 FOR_EACH_LOOP (li, loop_nest, 0)
4427 analyze_all_data_dependences (loop_nest);
4428 }
4429
4430 /* Free the memory used by a data dependence relation DDR. */
4431
4432 void
4433 free_dependence_relation (struct data_dependence_relation *ddr)
4434 {
4435 if (ddr == NULL)
4436 return;
4437
4438 if (DDR_SUBSCRIPTS (ddr))
4439 free_subscripts (DDR_SUBSCRIPTS (ddr));
4440 if (DDR_DIST_VECTS (ddr))
4441 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4442 if (DDR_DIR_VECTS (ddr))
4443 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4444
4445 free (ddr);
4446 }
4447
4448 /* Free the memory used by the data dependence relations from
4449 DEPENDENCE_RELATIONS. */
4450
4451 void
4452 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4453 {
4454 unsigned int i;
4455 struct data_dependence_relation *ddr;
4456
4457 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4458 if (ddr)
4459 free_dependence_relation (ddr);
4460
4461 VEC_free (ddr_p, heap, dependence_relations);
4462 }
4463
4464 /* Free the memory used by the data references from DATAREFS. */
4465
4466 void
4467 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4468 {
4469 unsigned int i;
4470 struct data_reference *dr;
4471
4472 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4473 free_data_ref (dr);
4474 VEC_free (data_reference_p, heap, datarefs);
4475 }
4476
4477 \f
4478
4479 /* Dump vertex I in RDG to FILE. */
4480
4481 void
4482 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4483 {
4484 struct vertex *v = &(rdg->vertices[i]);
4485 struct graph_edge *e;
4486
4487 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4488 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4489 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4490
4491 if (v->pred)
4492 for (e = v->pred; e; e = e->pred_next)
4493 fprintf (file, " %d", e->src);
4494
4495 fprintf (file, ") (out:");
4496
4497 if (v->succ)
4498 for (e = v->succ; e; e = e->succ_next)
4499 fprintf (file, " %d", e->dest);
4500
4501 fprintf (file, ")\n");
4502 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4503 fprintf (file, ")\n");
4504 }
4505
4506 /* Call dump_rdg_vertex on stderr. */
4507
4508 DEBUG_FUNCTION void
4509 debug_rdg_vertex (struct graph *rdg, int i)
4510 {
4511 dump_rdg_vertex (stderr, rdg, i);
4512 }
4513
4514 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4515 dumped vertices to that bitmap. */
4516
4517 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4518 {
4519 int i;
4520
4521 fprintf (file, "(%d\n", c);
4522
4523 for (i = 0; i < rdg->n_vertices; i++)
4524 if (rdg->vertices[i].component == c)
4525 {
4526 if (dumped)
4527 bitmap_set_bit (dumped, i);
4528
4529 dump_rdg_vertex (file, rdg, i);
4530 }
4531
4532 fprintf (file, ")\n");
4533 }
4534
4535 /* Call dump_rdg_vertex on stderr. */
4536
4537 DEBUG_FUNCTION void
4538 debug_rdg_component (struct graph *rdg, int c)
4539 {
4540 dump_rdg_component (stderr, rdg, c, NULL);
4541 }
4542
4543 /* Dump the reduced dependence graph RDG to FILE. */
4544
4545 void
4546 dump_rdg (FILE *file, struct graph *rdg)
4547 {
4548 int i;
4549 bitmap dumped = BITMAP_ALLOC (NULL);
4550
4551 fprintf (file, "(rdg\n");
4552
4553 for (i = 0; i < rdg->n_vertices; i++)
4554 if (!bitmap_bit_p (dumped, i))
4555 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4556
4557 fprintf (file, ")\n");
4558 BITMAP_FREE (dumped);
4559 }
4560
4561 /* Call dump_rdg on stderr. */
4562
4563 DEBUG_FUNCTION void
4564 debug_rdg (struct graph *rdg)
4565 {
4566 dump_rdg (stderr, rdg);
4567 }
4568
4569 static void
4570 dot_rdg_1 (FILE *file, struct graph *rdg)
4571 {
4572 int i;
4573
4574 fprintf (file, "digraph RDG {\n");
4575
4576 for (i = 0; i < rdg->n_vertices; i++)
4577 {
4578 struct vertex *v = &(rdg->vertices[i]);
4579 struct graph_edge *e;
4580
4581 /* Highlight reads from memory. */
4582 if (RDG_MEM_READS_STMT (rdg, i))
4583 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4584
4585 /* Highlight stores to memory. */
4586 if (RDG_MEM_WRITE_STMT (rdg, i))
4587 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4588
4589 if (v->succ)
4590 for (e = v->succ; e; e = e->succ_next)
4591 switch (RDGE_TYPE (e))
4592 {
4593 case input_dd:
4594 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4595 break;
4596
4597 case output_dd:
4598 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4599 break;
4600
4601 case flow_dd:
4602 /* These are the most common dependences: don't print these. */
4603 fprintf (file, "%d -> %d \n", i, e->dest);
4604 break;
4605
4606 case anti_dd:
4607 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4608 break;
4609
4610 default:
4611 gcc_unreachable ();
4612 }
4613 }
4614
4615 fprintf (file, "}\n\n");
4616 }
4617
4618 /* Display the Reduced Dependence Graph using dotty. */
4619 extern void dot_rdg (struct graph *);
4620
4621 DEBUG_FUNCTION void
4622 dot_rdg (struct graph *rdg)
4623 {
4624 /* When debugging, enable the following code. This cannot be used
4625 in production compilers because it calls "system". */
4626 #if 0
4627 FILE *file = fopen ("/tmp/rdg.dot", "w");
4628 gcc_assert (file != NULL);
4629
4630 dot_rdg_1 (file, rdg);
4631 fclose (file);
4632
4633 system ("dotty /tmp/rdg.dot &");
4634 #else
4635 dot_rdg_1 (stderr, rdg);
4636 #endif
4637 }
4638
4639 /* This structure is used for recording the mapping statement index in
4640 the RDG. */
4641
4642 struct GTY(()) rdg_vertex_info
4643 {
4644 gimple stmt;
4645 int index;
4646 };
4647
4648 /* Returns the index of STMT in RDG. */
4649
4650 int
4651 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4652 {
4653 struct rdg_vertex_info rvi, *slot;
4654
4655 rvi.stmt = stmt;
4656 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4657
4658 if (!slot)
4659 return -1;
4660
4661 return slot->index;
4662 }
4663
4664 /* Creates an edge in RDG for each distance vector from DDR. The
4665 order that we keep track of in the RDG is the order in which
4666 statements have to be executed. */
4667
4668 static void
4669 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4670 {
4671 struct graph_edge *e;
4672 int va, vb;
4673 data_reference_p dra = DDR_A (ddr);
4674 data_reference_p drb = DDR_B (ddr);
4675 unsigned level = ddr_dependence_level (ddr);
4676
4677 /* For non scalar dependences, when the dependence is REVERSED,
4678 statement B has to be executed before statement A. */
4679 if (level > 0
4680 && !DDR_REVERSED_P (ddr))
4681 {
4682 data_reference_p tmp = dra;
4683 dra = drb;
4684 drb = tmp;
4685 }
4686
4687 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4688 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4689
4690 if (va < 0 || vb < 0)
4691 return;
4692
4693 e = add_edge (rdg, va, vb);
4694 e->data = XNEW (struct rdg_edge);
4695
4696 RDGE_LEVEL (e) = level;
4697 RDGE_RELATION (e) = ddr;
4698
4699 /* Determines the type of the data dependence. */
4700 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4701 RDGE_TYPE (e) = input_dd;
4702 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4703 RDGE_TYPE (e) = output_dd;
4704 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4705 RDGE_TYPE (e) = flow_dd;
4706 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4707 RDGE_TYPE (e) = anti_dd;
4708 }
4709
4710 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4711 the index of DEF in RDG. */
4712
4713 static void
4714 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4715 {
4716 use_operand_p imm_use_p;
4717 imm_use_iterator iterator;
4718
4719 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4720 {
4721 struct graph_edge *e;
4722 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4723
4724 if (use < 0)
4725 continue;
4726
4727 e = add_edge (rdg, idef, use);
4728 e->data = XNEW (struct rdg_edge);
4729 RDGE_TYPE (e) = flow_dd;
4730 RDGE_RELATION (e) = NULL;
4731 }
4732 }
4733
4734 /* Creates the edges of the reduced dependence graph RDG. */
4735
4736 static void
4737 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4738 {
4739 int i;
4740 struct data_dependence_relation *ddr;
4741 def_operand_p def_p;
4742 ssa_op_iter iter;
4743
4744 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4745 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4746 create_rdg_edge_for_ddr (rdg, ddr);
4747
4748 for (i = 0; i < rdg->n_vertices; i++)
4749 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4750 iter, SSA_OP_DEF)
4751 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4752 }
4753
4754 /* Build the vertices of the reduced dependence graph RDG. */
4755
4756 void
4757 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4758 {
4759 int i, j;
4760 gimple stmt;
4761
4762 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4763 {
4764 VEC (data_ref_loc, heap) *references;
4765 data_ref_loc *ref;
4766 struct vertex *v = &(rdg->vertices[i]);
4767 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4768 struct rdg_vertex_info **slot;
4769
4770 rvi->stmt = stmt;
4771 rvi->index = i;
4772 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4773
4774 if (!*slot)
4775 *slot = rvi;
4776 else
4777 free (rvi);
4778
4779 v->data = XNEW (struct rdg_vertex);
4780 RDG_STMT (rdg, i) = stmt;
4781
4782 RDG_MEM_WRITE_STMT (rdg, i) = false;
4783 RDG_MEM_READS_STMT (rdg, i) = false;
4784 if (gimple_code (stmt) == GIMPLE_PHI)
4785 continue;
4786
4787 get_references_in_stmt (stmt, &references);
4788 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4789 if (!ref->is_read)
4790 RDG_MEM_WRITE_STMT (rdg, i) = true;
4791 else
4792 RDG_MEM_READS_STMT (rdg, i) = true;
4793
4794 VEC_free (data_ref_loc, heap, references);
4795 }
4796 }
4797
4798 /* Initialize STMTS with all the statements of LOOP. When
4799 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4800 which we discover statements is important as
4801 generate_loops_for_partition is using the same traversal for
4802 identifying statements. */
4803
4804 static void
4805 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4806 {
4807 unsigned int i;
4808 basic_block *bbs = get_loop_body_in_dom_order (loop);
4809
4810 for (i = 0; i < loop->num_nodes; i++)
4811 {
4812 basic_block bb = bbs[i];
4813 gimple_stmt_iterator bsi;
4814 gimple stmt;
4815
4816 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4817 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4818
4819 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4820 {
4821 stmt = gsi_stmt (bsi);
4822 if (gimple_code (stmt) != GIMPLE_LABEL)
4823 VEC_safe_push (gimple, heap, *stmts, stmt);
4824 }
4825 }
4826
4827 free (bbs);
4828 }
4829
4830 /* Returns true when all the dependences are computable. */
4831
4832 static bool
4833 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4834 {
4835 ddr_p ddr;
4836 unsigned int i;
4837
4838 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4839 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4840 return false;
4841
4842 return true;
4843 }
4844
4845 /* Computes a hash function for element ELT. */
4846
4847 static hashval_t
4848 hash_stmt_vertex_info (const void *elt)
4849 {
4850 const struct rdg_vertex_info *const rvi =
4851 (const struct rdg_vertex_info *) elt;
4852 gimple stmt = rvi->stmt;
4853
4854 return htab_hash_pointer (stmt);
4855 }
4856
4857 /* Compares database elements E1 and E2. */
4858
4859 static int
4860 eq_stmt_vertex_info (const void *e1, const void *e2)
4861 {
4862 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4863 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4864
4865 return elt1->stmt == elt2->stmt;
4866 }
4867
4868 /* Free the element E. */
4869
4870 static void
4871 hash_stmt_vertex_del (void *e)
4872 {
4873 free (e);
4874 }
4875
4876 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4877 statement of the loop nest, and one edge per data dependence or
4878 scalar dependence. */
4879
4880 struct graph *
4881 build_empty_rdg (int n_stmts)
4882 {
4883 int nb_data_refs = 10;
4884 struct graph *rdg = new_graph (n_stmts);
4885
4886 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4887 eq_stmt_vertex_info, hash_stmt_vertex_del);
4888 return rdg;
4889 }
4890
4891 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4892 statement of the loop nest, and one edge per data dependence or
4893 scalar dependence. */
4894
4895 struct graph *
4896 build_rdg (struct loop *loop,
4897 VEC (loop_p, heap) **loop_nest,
4898 VEC (ddr_p, heap) **dependence_relations,
4899 VEC (data_reference_p, heap) **datarefs)
4900 {
4901 struct graph *rdg = NULL;
4902 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
4903
4904 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
4905 dependence_relations);
4906
4907 if (known_dependences_p (*dependence_relations))
4908 {
4909 stmts_from_loop (loop, &stmts);
4910 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4911 create_rdg_vertices (rdg, stmts);
4912 create_rdg_edges (rdg, *dependence_relations);
4913 }
4914
4915 VEC_free (gimple, heap, stmts);
4916 return rdg;
4917 }
4918
4919 /* Free the reduced dependence graph RDG. */
4920
4921 void
4922 free_rdg (struct graph *rdg)
4923 {
4924 int i;
4925
4926 for (i = 0; i < rdg->n_vertices; i++)
4927 {
4928 struct vertex *v = &(rdg->vertices[i]);
4929 struct graph_edge *e;
4930
4931 for (e = v->succ; e; e = e->succ_next)
4932 if (e->data)
4933 free (e->data);
4934
4935 if (v->data)
4936 free (v->data);
4937 }
4938
4939 htab_delete (rdg->indices);
4940 free_graph (rdg);
4941 }
4942
4943 /* Initialize STMTS with all the statements of LOOP that contain a
4944 store to memory. */
4945
4946 void
4947 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4948 {
4949 unsigned int i;
4950 basic_block *bbs = get_loop_body_in_dom_order (loop);
4951
4952 for (i = 0; i < loop->num_nodes; i++)
4953 {
4954 basic_block bb = bbs[i];
4955 gimple_stmt_iterator bsi;
4956
4957 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4958 if (gimple_vdef (gsi_stmt (bsi)))
4959 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4960 }
4961
4962 free (bbs);
4963 }
4964
4965 /* Returns true when the statement at STMT is of the form "A[i] = 0"
4966 that contains a data reference on its LHS with a stride of the same
4967 size as its unit type. */
4968
4969 bool
4970 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
4971 {
4972 tree op0, op1;
4973 bool res;
4974 struct data_reference *dr;
4975
4976 if (!stmt
4977 || !gimple_vdef (stmt)
4978 || !is_gimple_assign (stmt)
4979 || !gimple_assign_single_p (stmt)
4980 || !(op1 = gimple_assign_rhs1 (stmt))
4981 || !(integer_zerop (op1) || real_zerop (op1)))
4982 return false;
4983
4984 dr = XCNEW (struct data_reference);
4985 op0 = gimple_assign_lhs (stmt);
4986
4987 DR_STMT (dr) = stmt;
4988 DR_REF (dr) = op0;
4989
4990 res = dr_analyze_innermost (dr)
4991 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
4992
4993 free_data_ref (dr);
4994 return res;
4995 }
4996
4997 /* Initialize STMTS with all the statements of LOOP that contain a
4998 store to memory of the form "A[i] = 0". */
4999
5000 void
5001 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5002 {
5003 unsigned int i;
5004 basic_block bb;
5005 gimple_stmt_iterator si;
5006 gimple stmt;
5007 basic_block *bbs = get_loop_body_in_dom_order (loop);
5008
5009 for (i = 0; i < loop->num_nodes; i++)
5010 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5011 if ((stmt = gsi_stmt (si))
5012 && stmt_with_adjacent_zero_store_dr_p (stmt))
5013 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5014
5015 free (bbs);
5016 }
5017
5018 /* For a data reference REF, return the declaration of its base
5019 address or NULL_TREE if the base is not determined. */
5020
5021 static inline tree
5022 ref_base_address (gimple stmt, data_ref_loc *ref)
5023 {
5024 tree base = NULL_TREE;
5025 tree base_address;
5026 struct data_reference *dr = XCNEW (struct data_reference);
5027
5028 DR_STMT (dr) = stmt;
5029 DR_REF (dr) = *ref->pos;
5030 dr_analyze_innermost (dr);
5031 base_address = DR_BASE_ADDRESS (dr);
5032
5033 if (!base_address)
5034 goto end;
5035
5036 switch (TREE_CODE (base_address))
5037 {
5038 case ADDR_EXPR:
5039 base = TREE_OPERAND (base_address, 0);
5040 break;
5041
5042 default:
5043 base = base_address;
5044 break;
5045 }
5046
5047 end:
5048 free_data_ref (dr);
5049 return base;
5050 }
5051
5052 /* Determines whether the statement from vertex V of the RDG has a
5053 definition used outside the loop that contains this statement. */
5054
5055 bool
5056 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5057 {
5058 gimple stmt = RDG_STMT (rdg, v);
5059 struct loop *loop = loop_containing_stmt (stmt);
5060 use_operand_p imm_use_p;
5061 imm_use_iterator iterator;
5062 ssa_op_iter it;
5063 def_operand_p def_p;
5064
5065 if (!loop)
5066 return true;
5067
5068 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5069 {
5070 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5071 {
5072 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5073 return true;
5074 }
5075 }
5076
5077 return false;
5078 }
5079
5080 /* Determines whether statements S1 and S2 access to similar memory
5081 locations. Two memory accesses are considered similar when they
5082 have the same base address declaration, i.e. when their
5083 ref_base_address is the same. */
5084
5085 bool
5086 have_similar_memory_accesses (gimple s1, gimple s2)
5087 {
5088 bool res = false;
5089 unsigned i, j;
5090 VEC (data_ref_loc, heap) *refs1, *refs2;
5091 data_ref_loc *ref1, *ref2;
5092
5093 get_references_in_stmt (s1, &refs1);
5094 get_references_in_stmt (s2, &refs2);
5095
5096 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5097 {
5098 tree base1 = ref_base_address (s1, ref1);
5099
5100 if (base1)
5101 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5102 if (base1 == ref_base_address (s2, ref2))
5103 {
5104 res = true;
5105 goto end;
5106 }
5107 }
5108
5109 end:
5110 VEC_free (data_ref_loc, heap, refs1);
5111 VEC_free (data_ref_loc, heap, refs2);
5112 return res;
5113 }
5114
5115 /* Helper function for the hashtab. */
5116
5117 static int
5118 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5119 {
5120 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5121 CONST_CAST_GIMPLE ((const_gimple) s2));
5122 }
5123
5124 /* Helper function for the hashtab. */
5125
5126 static hashval_t
5127 ref_base_address_1 (const void *s)
5128 {
5129 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5130 unsigned i;
5131 VEC (data_ref_loc, heap) *refs;
5132 data_ref_loc *ref;
5133 hashval_t res = 0;
5134
5135 get_references_in_stmt (stmt, &refs);
5136
5137 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5138 if (!ref->is_read)
5139 {
5140 res = htab_hash_pointer (ref_base_address (stmt, ref));
5141 break;
5142 }
5143
5144 VEC_free (data_ref_loc, heap, refs);
5145 return res;
5146 }
5147
5148 /* Try to remove duplicated write data references from STMTS. */
5149
5150 void
5151 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5152 {
5153 unsigned i;
5154 gimple stmt;
5155 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5156 have_similar_memory_accesses_1, NULL);
5157
5158 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5159 {
5160 void **slot;
5161
5162 slot = htab_find_slot (seen, stmt, INSERT);
5163
5164 if (*slot)
5165 VEC_ordered_remove (gimple, *stmts, i);
5166 else
5167 {
5168 *slot = (void *) stmt;
5169 i++;
5170 }
5171 }
5172
5173 htab_delete (seen);
5174 }
5175
5176 /* Returns the index of PARAMETER in the parameters vector of the
5177 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5178
5179 int
5180 access_matrix_get_index_for_parameter (tree parameter,
5181 struct access_matrix *access_matrix)
5182 {
5183 int i;
5184 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5185 tree lambda_parameter;
5186
5187 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5188 if (lambda_parameter == parameter)
5189 return i + AM_NB_INDUCTION_VARS (access_matrix);
5190
5191 return -1;
5192 }