re PR middle-end/52580 (171.swim performance drop on x86 – vectorization doesn’t...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
89
90 static struct datadep_stats
91 {
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
96
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
100
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
105
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
110
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
116
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
122
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
125 {
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 }
130
131 /* Returns true iff A divides B. */
132
133 static inline bool
134 int_divides_p (int a, int b)
135 {
136 return ((b % a) == 0);
137 }
138
139 \f
140
141 /* Dump into FILE all the data references from DATAREFS. */
142
143 void
144 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 {
146 unsigned int i;
147 struct data_reference *dr;
148
149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
150 dump_data_reference (file, dr);
151 }
152
153 /* Dump into STDERR all the data references from DATAREFS. */
154
155 DEBUG_FUNCTION void
156 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 {
158 dump_data_references (stderr, datarefs);
159 }
160
161 /* Dump to STDERR all the dependence relations from DDRS. */
162
163 DEBUG_FUNCTION void
164 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 {
166 dump_data_dependence_relations (stderr, ddrs);
167 }
168
169 /* Dump into FILE all the dependence relations from DDRS. */
170
171 void
172 dump_data_dependence_relations (FILE *file,
173 VEC (ddr_p, heap) *ddrs)
174 {
175 unsigned int i;
176 struct data_dependence_relation *ddr;
177
178 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
179 dump_data_dependence_relation (file, ddr);
180 }
181
182 /* Print to STDERR the data_reference DR. */
183
184 DEBUG_FUNCTION void
185 debug_data_reference (struct data_reference *dr)
186 {
187 dump_data_reference (stderr, dr);
188 }
189
190 /* Dump function for a DATA_REFERENCE structure. */
191
192 void
193 dump_data_reference (FILE *outf,
194 struct data_reference *dr)
195 {
196 unsigned int i;
197
198 fprintf (outf, "#(Data Ref: \n");
199 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
200 fprintf (outf, "# stmt: ");
201 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
202 fprintf (outf, "# ref: ");
203 print_generic_stmt (outf, DR_REF (dr), 0);
204 fprintf (outf, "# base_object: ");
205 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206
207 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 {
209 fprintf (outf, "# Access function %d: ", i);
210 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 }
212 fprintf (outf, "#)\n");
213 }
214
215 /* Dumps the affine function described by FN to the file OUTF. */
216
217 static void
218 dump_affine_function (FILE *outf, affine_fn fn)
219 {
220 unsigned i;
221 tree coef;
222
223 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
224 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 {
226 fprintf (outf, " + ");
227 print_generic_expr (outf, coef, TDF_SLIM);
228 fprintf (outf, " * x_%u", i);
229 }
230 }
231
232 /* Dumps the conflict function CF to the file OUTF. */
233
234 static void
235 dump_conflict_function (FILE *outf, conflict_function *cf)
236 {
237 unsigned i;
238
239 if (cf->n == NO_DEPENDENCE)
240 fprintf (outf, "no dependence\n");
241 else if (cf->n == NOT_KNOWN)
242 fprintf (outf, "not known\n");
243 else
244 {
245 for (i = 0; i < cf->n; i++)
246 {
247 fprintf (outf, "[");
248 dump_affine_function (outf, cf->fns[i]);
249 fprintf (outf, "]\n");
250 }
251 }
252 }
253
254 /* Dump function for a SUBSCRIPT structure. */
255
256 void
257 dump_subscript (FILE *outf, struct subscript *subscript)
258 {
259 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260
261 fprintf (outf, "\n (subscript \n");
262 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
263 dump_conflict_function (outf, cf);
264 if (CF_NONTRIVIAL_P (cf))
265 {
266 tree last_iteration = SUB_LAST_CONFLICT (subscript);
267 fprintf (outf, " last_conflict: ");
268 print_generic_stmt (outf, last_iteration, 0);
269 }
270
271 cf = SUB_CONFLICTS_IN_B (subscript);
272 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
273 dump_conflict_function (outf, cf);
274 if (CF_NONTRIVIAL_P (cf))
275 {
276 tree last_iteration = SUB_LAST_CONFLICT (subscript);
277 fprintf (outf, " last_conflict: ");
278 print_generic_stmt (outf, last_iteration, 0);
279 }
280
281 fprintf (outf, " (Subscript distance: ");
282 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
283 fprintf (outf, " )\n");
284 fprintf (outf, " )\n");
285 }
286
287 /* Print the classic direction vector DIRV to OUTF. */
288
289 void
290 print_direction_vector (FILE *outf,
291 lambda_vector dirv,
292 int length)
293 {
294 int eq;
295
296 for (eq = 0; eq < length; eq++)
297 {
298 enum data_dependence_direction dir = ((enum data_dependence_direction)
299 dirv[eq]);
300
301 switch (dir)
302 {
303 case dir_positive:
304 fprintf (outf, " +");
305 break;
306 case dir_negative:
307 fprintf (outf, " -");
308 break;
309 case dir_equal:
310 fprintf (outf, " =");
311 break;
312 case dir_positive_or_equal:
313 fprintf (outf, " +=");
314 break;
315 case dir_positive_or_negative:
316 fprintf (outf, " +-");
317 break;
318 case dir_negative_or_equal:
319 fprintf (outf, " -=");
320 break;
321 case dir_star:
322 fprintf (outf, " *");
323 break;
324 default:
325 fprintf (outf, "indep");
326 break;
327 }
328 }
329 fprintf (outf, "\n");
330 }
331
332 /* Print a vector of direction vectors. */
333
334 void
335 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
336 int length)
337 {
338 unsigned j;
339 lambda_vector v;
340
341 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
342 print_direction_vector (outf, v, length);
343 }
344
345 /* Print out a vector VEC of length N to OUTFILE. */
346
347 static inline void
348 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
349 {
350 int i;
351
352 for (i = 0; i < n; i++)
353 fprintf (outfile, "%3d ", vector[i]);
354 fprintf (outfile, "\n");
355 }
356
357 /* Print a vector of distance vectors. */
358
359 void
360 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
361 int length)
362 {
363 unsigned j;
364 lambda_vector v;
365
366 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
367 print_lambda_vector (outf, v, length);
368 }
369
370 /* Debug version. */
371
372 DEBUG_FUNCTION void
373 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 {
375 dump_data_dependence_relation (stderr, ddr);
376 }
377
378 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379
380 void
381 dump_data_dependence_relation (FILE *outf,
382 struct data_dependence_relation *ddr)
383 {
384 struct data_reference *dra, *drb;
385
386 fprintf (outf, "(Data Dep: \n");
387
388 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389 {
390 if (ddr)
391 {
392 dra = DDR_A (ddr);
393 drb = DDR_B (ddr);
394 if (dra)
395 dump_data_reference (outf, dra);
396 else
397 fprintf (outf, " (nil)\n");
398 if (drb)
399 dump_data_reference (outf, drb);
400 else
401 fprintf (outf, " (nil)\n");
402 }
403 fprintf (outf, " (don't know)\n)\n");
404 return;
405 }
406
407 dra = DDR_A (ddr);
408 drb = DDR_B (ddr);
409 dump_data_reference (outf, dra);
410 dump_data_reference (outf, drb);
411
412 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
413 fprintf (outf, " (no dependence)\n");
414
415 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
416 {
417 unsigned int i;
418 struct loop *loopi;
419
420 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 {
422 fprintf (outf, " access_fn_A: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
424 fprintf (outf, " access_fn_B: ");
425 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
426 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 }
428
429 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
430 fprintf (outf, " loop nest: (");
431 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
432 fprintf (outf, "%d ", loopi->num);
433 fprintf (outf, ")\n");
434
435 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 {
437 fprintf (outf, " distance_vector: ");
438 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
439 DDR_NB_LOOPS (ddr));
440 }
441
442 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 {
444 fprintf (outf, " direction_vector: ");
445 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
446 DDR_NB_LOOPS (ddr));
447 }
448 }
449
450 fprintf (outf, ")\n");
451 }
452
453 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454
455 void
456 dump_data_dependence_direction (FILE *file,
457 enum data_dependence_direction dir)
458 {
459 switch (dir)
460 {
461 case dir_positive:
462 fprintf (file, "+");
463 break;
464
465 case dir_negative:
466 fprintf (file, "-");
467 break;
468
469 case dir_equal:
470 fprintf (file, "=");
471 break;
472
473 case dir_positive_or_negative:
474 fprintf (file, "+-");
475 break;
476
477 case dir_positive_or_equal:
478 fprintf (file, "+=");
479 break;
480
481 case dir_negative_or_equal:
482 fprintf (file, "-=");
483 break;
484
485 case dir_star:
486 fprintf (file, "*");
487 break;
488
489 default:
490 break;
491 }
492 }
493
494 /* Dumps the distance and direction vectors in FILE. DDRS contains
495 the dependence relations, and VECT_SIZE is the size of the
496 dependence vectors, or in other words the number of loops in the
497 considered nest. */
498
499 void
500 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 {
502 unsigned int i, j;
503 struct data_dependence_relation *ddr;
504 lambda_vector v;
505
506 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
507 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 {
509 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 {
511 fprintf (file, "DISTANCE_V (");
512 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
513 fprintf (file, ")\n");
514 }
515
516 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 {
518 fprintf (file, "DIRECTION_V (");
519 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
520 fprintf (file, ")\n");
521 }
522 }
523
524 fprintf (file, "\n\n");
525 }
526
527 /* Dumps the data dependence relations DDRS in FILE. */
528
529 void
530 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 {
532 unsigned int i;
533 struct data_dependence_relation *ddr;
534
535 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
536 dump_data_dependence_relation (file, ddr);
537
538 fprintf (file, "\n\n");
539 }
540
541 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
542 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
543 constant of type ssizetype, and returns true. If we cannot do this
544 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
545 is returned. */
546
547 static bool
548 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
549 tree *var, tree *off)
550 {
551 tree var0, var1;
552 tree off0, off1;
553 enum tree_code ocode = code;
554
555 *var = NULL_TREE;
556 *off = NULL_TREE;
557
558 switch (code)
559 {
560 case INTEGER_CST:
561 *var = build_int_cst (type, 0);
562 *off = fold_convert (ssizetype, op0);
563 return true;
564
565 case POINTER_PLUS_EXPR:
566 ocode = PLUS_EXPR;
567 /* FALLTHROUGH */
568 case PLUS_EXPR:
569 case MINUS_EXPR:
570 split_constant_offset (op0, &var0, &off0);
571 split_constant_offset (op1, &var1, &off1);
572 *var = fold_build2 (code, type, var0, var1);
573 *off = size_binop (ocode, off0, off1);
574 return true;
575
576 case MULT_EXPR:
577 if (TREE_CODE (op1) != INTEGER_CST)
578 return false;
579
580 split_constant_offset (op0, &var0, &off0);
581 *var = fold_build2 (MULT_EXPR, type, var0, op1);
582 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
583 return true;
584
585 case ADDR_EXPR:
586 {
587 tree base, poffset;
588 HOST_WIDE_INT pbitsize, pbitpos;
589 enum machine_mode pmode;
590 int punsignedp, pvolatilep;
591
592 op0 = TREE_OPERAND (op0, 0);
593 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
594 &pmode, &punsignedp, &pvolatilep, false);
595
596 if (pbitpos % BITS_PER_UNIT != 0)
597 return false;
598 base = build_fold_addr_expr (base);
599 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
600
601 if (poffset)
602 {
603 split_constant_offset (poffset, &poffset, &off1);
604 off0 = size_binop (PLUS_EXPR, off0, off1);
605 if (POINTER_TYPE_P (TREE_TYPE (base)))
606 base = fold_build_pointer_plus (base, poffset);
607 else
608 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
609 fold_convert (TREE_TYPE (base), poffset));
610 }
611
612 var0 = fold_convert (type, base);
613
614 /* If variable length types are involved, punt, otherwise casts
615 might be converted into ARRAY_REFs in gimplify_conversion.
616 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
617 possibly no longer appears in current GIMPLE, might resurface.
618 This perhaps could run
619 if (CONVERT_EXPR_P (var0))
620 {
621 gimplify_conversion (&var0);
622 // Attempt to fill in any within var0 found ARRAY_REF's
623 // element size from corresponding op embedded ARRAY_REF,
624 // if unsuccessful, just punt.
625 } */
626 while (POINTER_TYPE_P (type))
627 type = TREE_TYPE (type);
628 if (int_size_in_bytes (type) < 0)
629 return false;
630
631 *var = var0;
632 *off = off0;
633 return true;
634 }
635
636 case SSA_NAME:
637 {
638 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
639 enum tree_code subcode;
640
641 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
642 return false;
643
644 var0 = gimple_assign_rhs1 (def_stmt);
645 subcode = gimple_assign_rhs_code (def_stmt);
646 var1 = gimple_assign_rhs2 (def_stmt);
647
648 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
649 }
650 CASE_CONVERT:
651 {
652 /* We must not introduce undefined overflow, and we must not change the value.
653 Hence we're okay if the inner type doesn't overflow to start with
654 (pointer or signed), the outer type also is an integer or pointer
655 and the outer precision is at least as large as the inner. */
656 tree itype = TREE_TYPE (op0);
657 if ((POINTER_TYPE_P (itype)
658 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
659 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
660 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
661 {
662 split_constant_offset (op0, &var0, off);
663 *var = fold_convert (type, var0);
664 return true;
665 }
666 return false;
667 }
668
669 default:
670 return false;
671 }
672 }
673
674 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
675 will be ssizetype. */
676
677 void
678 split_constant_offset (tree exp, tree *var, tree *off)
679 {
680 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
681 enum tree_code code;
682
683 *var = exp;
684 *off = ssize_int (0);
685 STRIP_NOPS (exp);
686
687 if (tree_is_chrec (exp)
688 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
689 return;
690
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
695 {
696 *var = fold_convert (type, e);
697 *off = o;
698 }
699 }
700
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
703
704 static tree
705 canonicalize_base_object_address (tree addr)
706 {
707 tree orig = addr;
708
709 STRIP_NOPS (addr);
710
711 /* The base address may be obtained by casting from integer, in that case
712 keep the cast. */
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714 return orig;
715
716 if (TREE_CODE (addr) != ADDR_EXPR)
717 return addr;
718
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
720 }
721
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
724 otherwise. */
725
726 bool
727 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
728 {
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
733 tree base, poffset;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
739
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
742
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
746
747 if (pbitpos % BITS_PER_UNIT != 0)
748 {
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
751 return false;
752 }
753
754 if (TREE_CODE (base) == MEM_REF)
755 {
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
757 {
758 if (!poffset)
759 {
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
762 }
763 else
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
765 }
766 base = TREE_OPERAND (base, 0);
767 }
768 else
769 base = build_fold_addr_expr (base);
770
771 if (in_loop)
772 {
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
775 {
776 if (nest)
777 {
778 if (dump_file && (dump_flags & TDF_DETAILS))
779 fprintf (dump_file, "failed: evolution of base is not"
780 " affine.\n");
781 return false;
782 }
783 else
784 {
785 base_iv.base = base;
786 base_iv.step = ssize_int (0);
787 base_iv.no_overflow = true;
788 }
789 }
790 }
791 else
792 {
793 base_iv.base = base;
794 base_iv.step = ssize_int (0);
795 base_iv.no_overflow = true;
796 }
797
798 if (!poffset)
799 {
800 offset_iv.base = ssize_int (0);
801 offset_iv.step = ssize_int (0);
802 }
803 else
804 {
805 if (!in_loop)
806 {
807 offset_iv.base = poffset;
808 offset_iv.step = ssize_int (0);
809 }
810 else if (!simple_iv (loop, loop_containing_stmt (stmt),
811 poffset, &offset_iv, false))
812 {
813 if (nest)
814 {
815 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "failed: evolution of offset is not"
817 " affine.\n");
818 return false;
819 }
820 else
821 {
822 offset_iv.base = poffset;
823 offset_iv.step = ssize_int (0);
824 }
825 }
826 }
827
828 init = ssize_int (pbitpos / BITS_PER_UNIT);
829 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
830 init = size_binop (PLUS_EXPR, init, dinit);
831 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
832 init = size_binop (PLUS_EXPR, init, dinit);
833
834 step = size_binop (PLUS_EXPR,
835 fold_convert (ssizetype, base_iv.step),
836 fold_convert (ssizetype, offset_iv.step));
837
838 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
839
840 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
841 DR_INIT (dr) = init;
842 DR_STEP (dr) = step;
843
844 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
845
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "success.\n");
848
849 return true;
850 }
851
852 /* Determines the base object and the list of indices of memory reference
853 DR, analyzed in LOOP and instantiated in loop nest NEST. */
854
855 static void
856 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
857 {
858 VEC (tree, heap) *access_fns = NULL;
859 tree ref, op;
860 tree base, off, access_fn;
861 basic_block before_loop;
862
863 /* If analyzing a basic-block there are no indices to analyze
864 and thus no access functions. */
865 if (!nest)
866 {
867 DR_BASE_OBJECT (dr) = DR_REF (dr);
868 DR_ACCESS_FNS (dr) = NULL;
869 return;
870 }
871
872 ref = DR_REF (dr);
873 before_loop = block_before_loop (nest);
874
875 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
876 into a two element array with a constant index. The base is
877 then just the immediate underlying object. */
878 if (TREE_CODE (ref) == REALPART_EXPR)
879 {
880 ref = TREE_OPERAND (ref, 0);
881 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
882 }
883 else if (TREE_CODE (ref) == IMAGPART_EXPR)
884 {
885 ref = TREE_OPERAND (ref, 0);
886 VEC_safe_push (tree, heap, access_fns, integer_one_node);
887 }
888
889 /* Analyze access functions of dimensions we know to be independent. */
890 while (handled_component_p (ref))
891 {
892 if (TREE_CODE (ref) == ARRAY_REF)
893 {
894 op = TREE_OPERAND (ref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
898 }
899 else if (TREE_CODE (ref) == COMPONENT_REF
900 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
901 {
902 /* For COMPONENT_REFs of records (but not unions!) use the
903 FIELD_DECL offset as constant access function so we can
904 disambiguate a[i].f1 and a[i].f2. */
905 tree off = component_ref_field_offset (ref);
906 off = size_binop (PLUS_EXPR,
907 size_binop (MULT_EXPR,
908 fold_convert (bitsizetype, off),
909 bitsize_int (BITS_PER_UNIT)),
910 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
911 VEC_safe_push (tree, heap, access_fns, off);
912 }
913 else
914 /* If we have an unhandled component we could not translate
915 to an access function stop analyzing. We have determined
916 our base object in this case. */
917 break;
918
919 ref = TREE_OPERAND (ref, 0);
920 }
921
922 /* If the address operand of a MEM_REF base has an evolution in the
923 analyzed nest, add it as an additional independent access-function. */
924 if (TREE_CODE (ref) == MEM_REF)
925 {
926 op = TREE_OPERAND (ref, 0);
927 access_fn = analyze_scalar_evolution (loop, op);
928 access_fn = instantiate_scev (before_loop, loop, access_fn);
929 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
930 {
931 tree orig_type;
932 tree memoff = TREE_OPERAND (ref, 1);
933 base = initial_condition (access_fn);
934 orig_type = TREE_TYPE (base);
935 STRIP_USELESS_TYPE_CONVERSION (base);
936 split_constant_offset (base, &base, &off);
937 /* Fold the MEM_REF offset into the evolutions initial
938 value to make more bases comparable. */
939 if (!integer_zerop (memoff))
940 {
941 off = size_binop (PLUS_EXPR, off,
942 fold_convert (ssizetype, memoff));
943 memoff = build_int_cst (TREE_TYPE (memoff), 0);
944 }
945 access_fn = chrec_replace_initial_condition
946 (access_fn, fold_convert (orig_type, off));
947 /* ??? This is still not a suitable base object for
948 dr_may_alias_p - the base object needs to be an
949 access that covers the object as whole. With
950 an evolution in the pointer this cannot be
951 guaranteed.
952 As a band-aid, mark the access so we can special-case
953 it in dr_may_alias_p. */
954 ref = fold_build2_loc (EXPR_LOCATION (ref),
955 MEM_REF, TREE_TYPE (ref),
956 base, memoff);
957 DR_UNCONSTRAINED_BASE (dr) = true;
958 VEC_safe_push (tree, heap, access_fns, access_fn);
959 }
960 }
961 else if (DECL_P (ref))
962 {
963 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
964 ref = build2 (MEM_REF, TREE_TYPE (ref),
965 build_fold_addr_expr (ref),
966 build_int_cst (reference_alias_ptr_type (ref), 0));
967 }
968
969 DR_BASE_OBJECT (dr) = ref;
970 DR_ACCESS_FNS (dr) = access_fns;
971 }
972
973 /* Extracts the alias analysis information from the memory reference DR. */
974
975 static void
976 dr_analyze_alias (struct data_reference *dr)
977 {
978 tree ref = DR_REF (dr);
979 tree base = get_base_address (ref), addr;
980
981 if (INDIRECT_REF_P (base)
982 || TREE_CODE (base) == MEM_REF)
983 {
984 addr = TREE_OPERAND (base, 0);
985 if (TREE_CODE (addr) == SSA_NAME)
986 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
987 }
988 }
989
990 /* Frees data reference DR. */
991
992 void
993 free_data_ref (data_reference_p dr)
994 {
995 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
996 free (dr);
997 }
998
999 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1000 is read if IS_READ is true, write otherwise. Returns the
1001 data_reference description of MEMREF. NEST is the outermost loop
1002 in which the reference should be instantiated, LOOP is the loop in
1003 which the data reference should be analyzed. */
1004
1005 struct data_reference *
1006 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1007 bool is_read)
1008 {
1009 struct data_reference *dr;
1010
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1012 {
1013 fprintf (dump_file, "Creating dr for ");
1014 print_generic_expr (dump_file, memref, TDF_SLIM);
1015 fprintf (dump_file, "\n");
1016 }
1017
1018 dr = XCNEW (struct data_reference);
1019 DR_STMT (dr) = stmt;
1020 DR_REF (dr) = memref;
1021 DR_IS_READ (dr) = is_read;
1022
1023 dr_analyze_innermost (dr, nest);
1024 dr_analyze_indices (dr, nest, loop);
1025 dr_analyze_alias (dr);
1026
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 {
1029 unsigned i;
1030 fprintf (dump_file, "\tbase_address: ");
1031 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1032 fprintf (dump_file, "\n\toffset from base address: ");
1033 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1034 fprintf (dump_file, "\n\tconstant offset from base address: ");
1035 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1036 fprintf (dump_file, "\n\tstep: ");
1037 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1038 fprintf (dump_file, "\n\taligned to: ");
1039 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1040 fprintf (dump_file, "\n\tbase_object: ");
1041 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1042 fprintf (dump_file, "\n");
1043 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1044 {
1045 fprintf (dump_file, "\tAccess function %d: ", i);
1046 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1047 }
1048 }
1049
1050 return dr;
1051 }
1052
1053 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1054 expressions. */
1055 static bool
1056 dr_equal_offsets_p1 (tree offset1, tree offset2)
1057 {
1058 bool res;
1059
1060 STRIP_NOPS (offset1);
1061 STRIP_NOPS (offset2);
1062
1063 if (offset1 == offset2)
1064 return true;
1065
1066 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1067 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1068 return false;
1069
1070 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1071 TREE_OPERAND (offset2, 0));
1072
1073 if (!res || !BINARY_CLASS_P (offset1))
1074 return res;
1075
1076 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1077 TREE_OPERAND (offset2, 1));
1078
1079 return res;
1080 }
1081
1082 /* Check if DRA and DRB have equal offsets. */
1083 bool
1084 dr_equal_offsets_p (struct data_reference *dra,
1085 struct data_reference *drb)
1086 {
1087 tree offset1, offset2;
1088
1089 offset1 = DR_OFFSET (dra);
1090 offset2 = DR_OFFSET (drb);
1091
1092 return dr_equal_offsets_p1 (offset1, offset2);
1093 }
1094
1095 /* Returns true if FNA == FNB. */
1096
1097 static bool
1098 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1099 {
1100 unsigned i, n = VEC_length (tree, fna);
1101
1102 if (n != VEC_length (tree, fnb))
1103 return false;
1104
1105 for (i = 0; i < n; i++)
1106 if (!operand_equal_p (VEC_index (tree, fna, i),
1107 VEC_index (tree, fnb, i), 0))
1108 return false;
1109
1110 return true;
1111 }
1112
1113 /* If all the functions in CF are the same, returns one of them,
1114 otherwise returns NULL. */
1115
1116 static affine_fn
1117 common_affine_function (conflict_function *cf)
1118 {
1119 unsigned i;
1120 affine_fn comm;
1121
1122 if (!CF_NONTRIVIAL_P (cf))
1123 return NULL;
1124
1125 comm = cf->fns[0];
1126
1127 for (i = 1; i < cf->n; i++)
1128 if (!affine_function_equal_p (comm, cf->fns[i]))
1129 return NULL;
1130
1131 return comm;
1132 }
1133
1134 /* Returns the base of the affine function FN. */
1135
1136 static tree
1137 affine_function_base (affine_fn fn)
1138 {
1139 return VEC_index (tree, fn, 0);
1140 }
1141
1142 /* Returns true if FN is a constant. */
1143
1144 static bool
1145 affine_function_constant_p (affine_fn fn)
1146 {
1147 unsigned i;
1148 tree coef;
1149
1150 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1151 if (!integer_zerop (coef))
1152 return false;
1153
1154 return true;
1155 }
1156
1157 /* Returns true if FN is the zero constant function. */
1158
1159 static bool
1160 affine_function_zero_p (affine_fn fn)
1161 {
1162 return (integer_zerop (affine_function_base (fn))
1163 && affine_function_constant_p (fn));
1164 }
1165
1166 /* Returns a signed integer type with the largest precision from TA
1167 and TB. */
1168
1169 static tree
1170 signed_type_for_types (tree ta, tree tb)
1171 {
1172 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1173 return signed_type_for (ta);
1174 else
1175 return signed_type_for (tb);
1176 }
1177
1178 /* Applies operation OP on affine functions FNA and FNB, and returns the
1179 result. */
1180
1181 static affine_fn
1182 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1183 {
1184 unsigned i, n, m;
1185 affine_fn ret;
1186 tree coef;
1187
1188 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1189 {
1190 n = VEC_length (tree, fna);
1191 m = VEC_length (tree, fnb);
1192 }
1193 else
1194 {
1195 n = VEC_length (tree, fnb);
1196 m = VEC_length (tree, fna);
1197 }
1198
1199 ret = VEC_alloc (tree, heap, m);
1200 for (i = 0; i < n; i++)
1201 {
1202 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1203 TREE_TYPE (VEC_index (tree, fnb, i)));
1204
1205 VEC_quick_push (tree, ret,
1206 fold_build2 (op, type,
1207 VEC_index (tree, fna, i),
1208 VEC_index (tree, fnb, i)));
1209 }
1210
1211 for (; VEC_iterate (tree, fna, i, coef); i++)
1212 VEC_quick_push (tree, ret,
1213 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1214 coef, integer_zero_node));
1215 for (; VEC_iterate (tree, fnb, i, coef); i++)
1216 VEC_quick_push (tree, ret,
1217 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1218 integer_zero_node, coef));
1219
1220 return ret;
1221 }
1222
1223 /* Returns the sum of affine functions FNA and FNB. */
1224
1225 static affine_fn
1226 affine_fn_plus (affine_fn fna, affine_fn fnb)
1227 {
1228 return affine_fn_op (PLUS_EXPR, fna, fnb);
1229 }
1230
1231 /* Returns the difference of affine functions FNA and FNB. */
1232
1233 static affine_fn
1234 affine_fn_minus (affine_fn fna, affine_fn fnb)
1235 {
1236 return affine_fn_op (MINUS_EXPR, fna, fnb);
1237 }
1238
1239 /* Frees affine function FN. */
1240
1241 static void
1242 affine_fn_free (affine_fn fn)
1243 {
1244 VEC_free (tree, heap, fn);
1245 }
1246
1247 /* Determine for each subscript in the data dependence relation DDR
1248 the distance. */
1249
1250 static void
1251 compute_subscript_distance (struct data_dependence_relation *ddr)
1252 {
1253 conflict_function *cf_a, *cf_b;
1254 affine_fn fn_a, fn_b, diff;
1255
1256 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1257 {
1258 unsigned int i;
1259
1260 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1261 {
1262 struct subscript *subscript;
1263
1264 subscript = DDR_SUBSCRIPT (ddr, i);
1265 cf_a = SUB_CONFLICTS_IN_A (subscript);
1266 cf_b = SUB_CONFLICTS_IN_B (subscript);
1267
1268 fn_a = common_affine_function (cf_a);
1269 fn_b = common_affine_function (cf_b);
1270 if (!fn_a || !fn_b)
1271 {
1272 SUB_DISTANCE (subscript) = chrec_dont_know;
1273 return;
1274 }
1275 diff = affine_fn_minus (fn_a, fn_b);
1276
1277 if (affine_function_constant_p (diff))
1278 SUB_DISTANCE (subscript) = affine_function_base (diff);
1279 else
1280 SUB_DISTANCE (subscript) = chrec_dont_know;
1281
1282 affine_fn_free (diff);
1283 }
1284 }
1285 }
1286
1287 /* Returns the conflict function for "unknown". */
1288
1289 static conflict_function *
1290 conflict_fn_not_known (void)
1291 {
1292 conflict_function *fn = XCNEW (conflict_function);
1293 fn->n = NOT_KNOWN;
1294
1295 return fn;
1296 }
1297
1298 /* Returns the conflict function for "independent". */
1299
1300 static conflict_function *
1301 conflict_fn_no_dependence (void)
1302 {
1303 conflict_function *fn = XCNEW (conflict_function);
1304 fn->n = NO_DEPENDENCE;
1305
1306 return fn;
1307 }
1308
1309 /* Returns true if the address of OBJ is invariant in LOOP. */
1310
1311 static bool
1312 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1313 {
1314 while (handled_component_p (obj))
1315 {
1316 if (TREE_CODE (obj) == ARRAY_REF)
1317 {
1318 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1319 need to check the stride and the lower bound of the reference. */
1320 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1321 loop->num)
1322 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1323 loop->num))
1324 return false;
1325 }
1326 else if (TREE_CODE (obj) == COMPONENT_REF)
1327 {
1328 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1329 loop->num))
1330 return false;
1331 }
1332 obj = TREE_OPERAND (obj, 0);
1333 }
1334
1335 if (!INDIRECT_REF_P (obj)
1336 && TREE_CODE (obj) != MEM_REF)
1337 return true;
1338
1339 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1340 loop->num);
1341 }
1342
1343 /* Returns false if we can prove that data references A and B do not alias,
1344 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1345 considered. */
1346
1347 bool
1348 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1349 bool loop_nest)
1350 {
1351 tree addr_a = DR_BASE_OBJECT (a);
1352 tree addr_b = DR_BASE_OBJECT (b);
1353
1354 /* If we are not processing a loop nest but scalar code we
1355 do not need to care about possible cross-iteration dependences
1356 and thus can process the full original reference. Do so,
1357 similar to how loop invariant motion applies extra offset-based
1358 disambiguation. */
1359 if (!loop_nest)
1360 {
1361 aff_tree off1, off2;
1362 double_int size1, size2;
1363 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1364 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1365 aff_combination_scale (&off1, double_int_minus_one);
1366 aff_combination_add (&off2, &off1);
1367 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1368 return false;
1369 }
1370
1371 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1372 the size of the base-object. So we cannot do any offset/overlap
1373 based analysis but have to rely on points-to information only. */
1374 if (TREE_CODE (addr_a) == MEM_REF
1375 && DR_UNCONSTRAINED_BASE (a))
1376 {
1377 if (TREE_CODE (addr_b) == MEM_REF
1378 && DR_UNCONSTRAINED_BASE (b))
1379 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1380 TREE_OPERAND (addr_b, 0));
1381 else
1382 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1383 build_fold_addr_expr (addr_b));
1384 }
1385 else if (TREE_CODE (addr_b) == MEM_REF
1386 && DR_UNCONSTRAINED_BASE (b))
1387 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1388 TREE_OPERAND (addr_b, 0));
1389
1390 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1391 that is being subsetted in the loop nest. */
1392 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1393 return refs_output_dependent_p (addr_a, addr_b);
1394 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1395 return refs_anti_dependent_p (addr_a, addr_b);
1396 return refs_may_alias_p (addr_a, addr_b);
1397 }
1398
1399 /* Initialize a data dependence relation between data accesses A and
1400 B. NB_LOOPS is the number of loops surrounding the references: the
1401 size of the classic distance/direction vectors. */
1402
1403 struct data_dependence_relation *
1404 initialize_data_dependence_relation (struct data_reference *a,
1405 struct data_reference *b,
1406 VEC (loop_p, heap) *loop_nest)
1407 {
1408 struct data_dependence_relation *res;
1409 unsigned int i;
1410
1411 res = XNEW (struct data_dependence_relation);
1412 DDR_A (res) = a;
1413 DDR_B (res) = b;
1414 DDR_LOOP_NEST (res) = NULL;
1415 DDR_REVERSED_P (res) = false;
1416 DDR_SUBSCRIPTS (res) = NULL;
1417 DDR_DIR_VECTS (res) = NULL;
1418 DDR_DIST_VECTS (res) = NULL;
1419
1420 if (a == NULL || b == NULL)
1421 {
1422 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1423 return res;
1424 }
1425
1426 /* If the data references do not alias, then they are independent. */
1427 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1428 {
1429 DDR_ARE_DEPENDENT (res) = chrec_known;
1430 return res;
1431 }
1432
1433 /* The case where the references are exactly the same. */
1434 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1435 {
1436 if (loop_nest
1437 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1438 DR_BASE_OBJECT (a)))
1439 {
1440 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1441 return res;
1442 }
1443 DDR_AFFINE_P (res) = true;
1444 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1445 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1446 DDR_LOOP_NEST (res) = loop_nest;
1447 DDR_INNER_LOOP (res) = 0;
1448 DDR_SELF_REFERENCE (res) = true;
1449 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1450 {
1451 struct subscript *subscript;
1452
1453 subscript = XNEW (struct subscript);
1454 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1455 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1456 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1457 SUB_DISTANCE (subscript) = chrec_dont_know;
1458 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1459 }
1460 return res;
1461 }
1462
1463 /* If the references do not access the same object, we do not know
1464 whether they alias or not. */
1465 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1466 {
1467 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1468 return res;
1469 }
1470
1471 /* If the base of the object is not invariant in the loop nest, we cannot
1472 analyze it. TODO -- in fact, it would suffice to record that there may
1473 be arbitrary dependences in the loops where the base object varies. */
1474 if (loop_nest
1475 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1476 DR_BASE_OBJECT (a)))
1477 {
1478 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1479 return res;
1480 }
1481
1482 /* If the number of dimensions of the access to not agree we can have
1483 a pointer access to a component of the array element type and an
1484 array access while the base-objects are still the same. Punt. */
1485 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1486 {
1487 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1488 return res;
1489 }
1490
1491 DDR_AFFINE_P (res) = true;
1492 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1493 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1494 DDR_LOOP_NEST (res) = loop_nest;
1495 DDR_INNER_LOOP (res) = 0;
1496 DDR_SELF_REFERENCE (res) = false;
1497
1498 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1499 {
1500 struct subscript *subscript;
1501
1502 subscript = XNEW (struct subscript);
1503 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1504 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1505 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1506 SUB_DISTANCE (subscript) = chrec_dont_know;
1507 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1508 }
1509
1510 return res;
1511 }
1512
1513 /* Frees memory used by the conflict function F. */
1514
1515 static void
1516 free_conflict_function (conflict_function *f)
1517 {
1518 unsigned i;
1519
1520 if (CF_NONTRIVIAL_P (f))
1521 {
1522 for (i = 0; i < f->n; i++)
1523 affine_fn_free (f->fns[i]);
1524 }
1525 free (f);
1526 }
1527
1528 /* Frees memory used by SUBSCRIPTS. */
1529
1530 static void
1531 free_subscripts (VEC (subscript_p, heap) *subscripts)
1532 {
1533 unsigned i;
1534 subscript_p s;
1535
1536 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1537 {
1538 free_conflict_function (s->conflicting_iterations_in_a);
1539 free_conflict_function (s->conflicting_iterations_in_b);
1540 free (s);
1541 }
1542 VEC_free (subscript_p, heap, subscripts);
1543 }
1544
1545 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1546 description. */
1547
1548 static inline void
1549 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1550 tree chrec)
1551 {
1552 if (dump_file && (dump_flags & TDF_DETAILS))
1553 {
1554 fprintf (dump_file, "(dependence classified: ");
1555 print_generic_expr (dump_file, chrec, 0);
1556 fprintf (dump_file, ")\n");
1557 }
1558
1559 DDR_ARE_DEPENDENT (ddr) = chrec;
1560 free_subscripts (DDR_SUBSCRIPTS (ddr));
1561 DDR_SUBSCRIPTS (ddr) = NULL;
1562 }
1563
1564 /* The dependence relation DDR cannot be represented by a distance
1565 vector. */
1566
1567 static inline void
1568 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1569 {
1570 if (dump_file && (dump_flags & TDF_DETAILS))
1571 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1572
1573 DDR_AFFINE_P (ddr) = false;
1574 }
1575
1576 \f
1577
1578 /* This section contains the classic Banerjee tests. */
1579
1580 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1581 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1582
1583 static inline bool
1584 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1585 {
1586 return (evolution_function_is_constant_p (chrec_a)
1587 && evolution_function_is_constant_p (chrec_b));
1588 }
1589
1590 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1591 variable, i.e., if the SIV (Single Index Variable) test is true. */
1592
1593 static bool
1594 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1595 {
1596 if ((evolution_function_is_constant_p (chrec_a)
1597 && evolution_function_is_univariate_p (chrec_b))
1598 || (evolution_function_is_constant_p (chrec_b)
1599 && evolution_function_is_univariate_p (chrec_a)))
1600 return true;
1601
1602 if (evolution_function_is_univariate_p (chrec_a)
1603 && evolution_function_is_univariate_p (chrec_b))
1604 {
1605 switch (TREE_CODE (chrec_a))
1606 {
1607 case POLYNOMIAL_CHREC:
1608 switch (TREE_CODE (chrec_b))
1609 {
1610 case POLYNOMIAL_CHREC:
1611 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1612 return false;
1613
1614 default:
1615 return true;
1616 }
1617
1618 default:
1619 return true;
1620 }
1621 }
1622
1623 return false;
1624 }
1625
1626 /* Creates a conflict function with N dimensions. The affine functions
1627 in each dimension follow. */
1628
1629 static conflict_function *
1630 conflict_fn (unsigned n, ...)
1631 {
1632 unsigned i;
1633 conflict_function *ret = XCNEW (conflict_function);
1634 va_list ap;
1635
1636 gcc_assert (0 < n && n <= MAX_DIM);
1637 va_start(ap, n);
1638
1639 ret->n = n;
1640 for (i = 0; i < n; i++)
1641 ret->fns[i] = va_arg (ap, affine_fn);
1642 va_end(ap);
1643
1644 return ret;
1645 }
1646
1647 /* Returns constant affine function with value CST. */
1648
1649 static affine_fn
1650 affine_fn_cst (tree cst)
1651 {
1652 affine_fn fn = VEC_alloc (tree, heap, 1);
1653 VEC_quick_push (tree, fn, cst);
1654 return fn;
1655 }
1656
1657 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1658
1659 static affine_fn
1660 affine_fn_univar (tree cst, unsigned dim, tree coef)
1661 {
1662 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1663 unsigned i;
1664
1665 gcc_assert (dim > 0);
1666 VEC_quick_push (tree, fn, cst);
1667 for (i = 1; i < dim; i++)
1668 VEC_quick_push (tree, fn, integer_zero_node);
1669 VEC_quick_push (tree, fn, coef);
1670 return fn;
1671 }
1672
1673 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1674 *OVERLAPS_B are initialized to the functions that describe the
1675 relation between the elements accessed twice by CHREC_A and
1676 CHREC_B. For k >= 0, the following property is verified:
1677
1678 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1679
1680 static void
1681 analyze_ziv_subscript (tree chrec_a,
1682 tree chrec_b,
1683 conflict_function **overlaps_a,
1684 conflict_function **overlaps_b,
1685 tree *last_conflicts)
1686 {
1687 tree type, difference;
1688 dependence_stats.num_ziv++;
1689
1690 if (dump_file && (dump_flags & TDF_DETAILS))
1691 fprintf (dump_file, "(analyze_ziv_subscript \n");
1692
1693 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1694 chrec_a = chrec_convert (type, chrec_a, NULL);
1695 chrec_b = chrec_convert (type, chrec_b, NULL);
1696 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1697
1698 switch (TREE_CODE (difference))
1699 {
1700 case INTEGER_CST:
1701 if (integer_zerop (difference))
1702 {
1703 /* The difference is equal to zero: the accessed index
1704 overlaps for each iteration in the loop. */
1705 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1706 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1707 *last_conflicts = chrec_dont_know;
1708 dependence_stats.num_ziv_dependent++;
1709 }
1710 else
1711 {
1712 /* The accesses do not overlap. */
1713 *overlaps_a = conflict_fn_no_dependence ();
1714 *overlaps_b = conflict_fn_no_dependence ();
1715 *last_conflicts = integer_zero_node;
1716 dependence_stats.num_ziv_independent++;
1717 }
1718 break;
1719
1720 default:
1721 /* We're not sure whether the indexes overlap. For the moment,
1722 conservatively answer "don't know". */
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1725
1726 *overlaps_a = conflict_fn_not_known ();
1727 *overlaps_b = conflict_fn_not_known ();
1728 *last_conflicts = chrec_dont_know;
1729 dependence_stats.num_ziv_unimplemented++;
1730 break;
1731 }
1732
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, ")\n");
1735 }
1736
1737 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1738 and only if it fits to the int type. If this is not the case, or the
1739 bound on the number of iterations of LOOP could not be derived, returns
1740 chrec_dont_know. */
1741
1742 static tree
1743 max_stmt_executions_tree (struct loop *loop)
1744 {
1745 double_int nit;
1746
1747 if (!max_stmt_executions (loop, true, &nit))
1748 return chrec_dont_know;
1749
1750 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1751 return chrec_dont_know;
1752
1753 return double_int_to_tree (unsigned_type_node, nit);
1754 }
1755
1756 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1757 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1758 *OVERLAPS_B are initialized to the functions that describe the
1759 relation between the elements accessed twice by CHREC_A and
1760 CHREC_B. For k >= 0, the following property is verified:
1761
1762 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1763
1764 static void
1765 analyze_siv_subscript_cst_affine (tree chrec_a,
1766 tree chrec_b,
1767 conflict_function **overlaps_a,
1768 conflict_function **overlaps_b,
1769 tree *last_conflicts)
1770 {
1771 bool value0, value1, value2;
1772 tree type, difference, tmp;
1773
1774 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1775 chrec_a = chrec_convert (type, chrec_a, NULL);
1776 chrec_b = chrec_convert (type, chrec_b, NULL);
1777 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1778
1779 if (!chrec_is_positive (initial_condition (difference), &value0))
1780 {
1781 if (dump_file && (dump_flags & TDF_DETAILS))
1782 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1783
1784 dependence_stats.num_siv_unimplemented++;
1785 *overlaps_a = conflict_fn_not_known ();
1786 *overlaps_b = conflict_fn_not_known ();
1787 *last_conflicts = chrec_dont_know;
1788 return;
1789 }
1790 else
1791 {
1792 if (value0 == false)
1793 {
1794 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1795 {
1796 if (dump_file && (dump_flags & TDF_DETAILS))
1797 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1798
1799 *overlaps_a = conflict_fn_not_known ();
1800 *overlaps_b = conflict_fn_not_known ();
1801 *last_conflicts = chrec_dont_know;
1802 dependence_stats.num_siv_unimplemented++;
1803 return;
1804 }
1805 else
1806 {
1807 if (value1 == true)
1808 {
1809 /* Example:
1810 chrec_a = 12
1811 chrec_b = {10, +, 1}
1812 */
1813
1814 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1815 {
1816 HOST_WIDE_INT numiter;
1817 struct loop *loop = get_chrec_loop (chrec_b);
1818
1819 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1820 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1821 fold_build1 (ABS_EXPR, type, difference),
1822 CHREC_RIGHT (chrec_b));
1823 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1824 *last_conflicts = integer_one_node;
1825
1826
1827 /* Perform weak-zero siv test to see if overlap is
1828 outside the loop bounds. */
1829 numiter = max_stmt_executions_int (loop, true);
1830
1831 if (numiter >= 0
1832 && compare_tree_int (tmp, numiter) > 0)
1833 {
1834 free_conflict_function (*overlaps_a);
1835 free_conflict_function (*overlaps_b);
1836 *overlaps_a = conflict_fn_no_dependence ();
1837 *overlaps_b = conflict_fn_no_dependence ();
1838 *last_conflicts = integer_zero_node;
1839 dependence_stats.num_siv_independent++;
1840 return;
1841 }
1842 dependence_stats.num_siv_dependent++;
1843 return;
1844 }
1845
1846 /* When the step does not divide the difference, there are
1847 no overlaps. */
1848 else
1849 {
1850 *overlaps_a = conflict_fn_no_dependence ();
1851 *overlaps_b = conflict_fn_no_dependence ();
1852 *last_conflicts = integer_zero_node;
1853 dependence_stats.num_siv_independent++;
1854 return;
1855 }
1856 }
1857
1858 else
1859 {
1860 /* Example:
1861 chrec_a = 12
1862 chrec_b = {10, +, -1}
1863
1864 In this case, chrec_a will not overlap with chrec_b. */
1865 *overlaps_a = conflict_fn_no_dependence ();
1866 *overlaps_b = conflict_fn_no_dependence ();
1867 *last_conflicts = integer_zero_node;
1868 dependence_stats.num_siv_independent++;
1869 return;
1870 }
1871 }
1872 }
1873 else
1874 {
1875 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1876 {
1877 if (dump_file && (dump_flags & TDF_DETAILS))
1878 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1879
1880 *overlaps_a = conflict_fn_not_known ();
1881 *overlaps_b = conflict_fn_not_known ();
1882 *last_conflicts = chrec_dont_know;
1883 dependence_stats.num_siv_unimplemented++;
1884 return;
1885 }
1886 else
1887 {
1888 if (value2 == false)
1889 {
1890 /* Example:
1891 chrec_a = 3
1892 chrec_b = {10, +, -1}
1893 */
1894 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1895 {
1896 HOST_WIDE_INT numiter;
1897 struct loop *loop = get_chrec_loop (chrec_b);
1898
1899 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1900 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1901 CHREC_RIGHT (chrec_b));
1902 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1903 *last_conflicts = integer_one_node;
1904
1905 /* Perform weak-zero siv test to see if overlap is
1906 outside the loop bounds. */
1907 numiter = max_stmt_executions_int (loop, true);
1908
1909 if (numiter >= 0
1910 && compare_tree_int (tmp, numiter) > 0)
1911 {
1912 free_conflict_function (*overlaps_a);
1913 free_conflict_function (*overlaps_b);
1914 *overlaps_a = conflict_fn_no_dependence ();
1915 *overlaps_b = conflict_fn_no_dependence ();
1916 *last_conflicts = integer_zero_node;
1917 dependence_stats.num_siv_independent++;
1918 return;
1919 }
1920 dependence_stats.num_siv_dependent++;
1921 return;
1922 }
1923
1924 /* When the step does not divide the difference, there
1925 are no overlaps. */
1926 else
1927 {
1928 *overlaps_a = conflict_fn_no_dependence ();
1929 *overlaps_b = conflict_fn_no_dependence ();
1930 *last_conflicts = integer_zero_node;
1931 dependence_stats.num_siv_independent++;
1932 return;
1933 }
1934 }
1935 else
1936 {
1937 /* Example:
1938 chrec_a = 3
1939 chrec_b = {4, +, 1}
1940
1941 In this case, chrec_a will not overlap with chrec_b. */
1942 *overlaps_a = conflict_fn_no_dependence ();
1943 *overlaps_b = conflict_fn_no_dependence ();
1944 *last_conflicts = integer_zero_node;
1945 dependence_stats.num_siv_independent++;
1946 return;
1947 }
1948 }
1949 }
1950 }
1951 }
1952
1953 /* Helper recursive function for initializing the matrix A. Returns
1954 the initial value of CHREC. */
1955
1956 static tree
1957 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1958 {
1959 gcc_assert (chrec);
1960
1961 switch (TREE_CODE (chrec))
1962 {
1963 case POLYNOMIAL_CHREC:
1964 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1965
1966 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1967 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1968
1969 case PLUS_EXPR:
1970 case MULT_EXPR:
1971 case MINUS_EXPR:
1972 {
1973 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1974 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1975
1976 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1977 }
1978
1979 case NOP_EXPR:
1980 {
1981 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1982 return chrec_convert (chrec_type (chrec), op, NULL);
1983 }
1984
1985 case BIT_NOT_EXPR:
1986 {
1987 /* Handle ~X as -1 - X. */
1988 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1989 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1990 build_int_cst (TREE_TYPE (chrec), -1), op);
1991 }
1992
1993 case INTEGER_CST:
1994 return chrec;
1995
1996 default:
1997 gcc_unreachable ();
1998 return NULL_TREE;
1999 }
2000 }
2001
2002 #define FLOOR_DIV(x,y) ((x) / (y))
2003
2004 /* Solves the special case of the Diophantine equation:
2005 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2006
2007 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2008 number of iterations that loops X and Y run. The overlaps will be
2009 constructed as evolutions in dimension DIM. */
2010
2011 static void
2012 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2013 affine_fn *overlaps_a,
2014 affine_fn *overlaps_b,
2015 tree *last_conflicts, int dim)
2016 {
2017 if (((step_a > 0 && step_b > 0)
2018 || (step_a < 0 && step_b < 0)))
2019 {
2020 int step_overlaps_a, step_overlaps_b;
2021 int gcd_steps_a_b, last_conflict, tau2;
2022
2023 gcd_steps_a_b = gcd (step_a, step_b);
2024 step_overlaps_a = step_b / gcd_steps_a_b;
2025 step_overlaps_b = step_a / gcd_steps_a_b;
2026
2027 if (niter > 0)
2028 {
2029 tau2 = FLOOR_DIV (niter, step_overlaps_a);
2030 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2031 last_conflict = tau2;
2032 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2033 }
2034 else
2035 *last_conflicts = chrec_dont_know;
2036
2037 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2038 build_int_cst (NULL_TREE,
2039 step_overlaps_a));
2040 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2041 build_int_cst (NULL_TREE,
2042 step_overlaps_b));
2043 }
2044
2045 else
2046 {
2047 *overlaps_a = affine_fn_cst (integer_zero_node);
2048 *overlaps_b = affine_fn_cst (integer_zero_node);
2049 *last_conflicts = integer_zero_node;
2050 }
2051 }
2052
2053 /* Solves the special case of a Diophantine equation where CHREC_A is
2054 an affine bivariate function, and CHREC_B is an affine univariate
2055 function. For example,
2056
2057 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2058
2059 has the following overlapping functions:
2060
2061 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2062 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2063 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2064
2065 FORNOW: This is a specialized implementation for a case occurring in
2066 a common benchmark. Implement the general algorithm. */
2067
2068 static void
2069 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2070 conflict_function **overlaps_a,
2071 conflict_function **overlaps_b,
2072 tree *last_conflicts)
2073 {
2074 bool xz_p, yz_p, xyz_p;
2075 int step_x, step_y, step_z;
2076 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2077 affine_fn overlaps_a_xz, overlaps_b_xz;
2078 affine_fn overlaps_a_yz, overlaps_b_yz;
2079 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2080 affine_fn ova1, ova2, ovb;
2081 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2082
2083 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2084 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2085 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2086
2087 niter_x =
2088 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2089 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2090 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2091
2092 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2093 {
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2096
2097 *overlaps_a = conflict_fn_not_known ();
2098 *overlaps_b = conflict_fn_not_known ();
2099 *last_conflicts = chrec_dont_know;
2100 return;
2101 }
2102
2103 niter = MIN (niter_x, niter_z);
2104 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2105 &overlaps_a_xz,
2106 &overlaps_b_xz,
2107 &last_conflicts_xz, 1);
2108 niter = MIN (niter_y, niter_z);
2109 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2110 &overlaps_a_yz,
2111 &overlaps_b_yz,
2112 &last_conflicts_yz, 2);
2113 niter = MIN (niter_x, niter_z);
2114 niter = MIN (niter_y, niter);
2115 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2116 &overlaps_a_xyz,
2117 &overlaps_b_xyz,
2118 &last_conflicts_xyz, 3);
2119
2120 xz_p = !integer_zerop (last_conflicts_xz);
2121 yz_p = !integer_zerop (last_conflicts_yz);
2122 xyz_p = !integer_zerop (last_conflicts_xyz);
2123
2124 if (xz_p || yz_p || xyz_p)
2125 {
2126 ova1 = affine_fn_cst (integer_zero_node);
2127 ova2 = affine_fn_cst (integer_zero_node);
2128 ovb = affine_fn_cst (integer_zero_node);
2129 if (xz_p)
2130 {
2131 affine_fn t0 = ova1;
2132 affine_fn t2 = ovb;
2133
2134 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2135 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2136 affine_fn_free (t0);
2137 affine_fn_free (t2);
2138 *last_conflicts = last_conflicts_xz;
2139 }
2140 if (yz_p)
2141 {
2142 affine_fn t0 = ova2;
2143 affine_fn t2 = ovb;
2144
2145 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2146 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2147 affine_fn_free (t0);
2148 affine_fn_free (t2);
2149 *last_conflicts = last_conflicts_yz;
2150 }
2151 if (xyz_p)
2152 {
2153 affine_fn t0 = ova1;
2154 affine_fn t2 = ova2;
2155 affine_fn t4 = ovb;
2156
2157 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2158 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2159 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2160 affine_fn_free (t0);
2161 affine_fn_free (t2);
2162 affine_fn_free (t4);
2163 *last_conflicts = last_conflicts_xyz;
2164 }
2165 *overlaps_a = conflict_fn (2, ova1, ova2);
2166 *overlaps_b = conflict_fn (1, ovb);
2167 }
2168 else
2169 {
2170 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2171 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2172 *last_conflicts = integer_zero_node;
2173 }
2174
2175 affine_fn_free (overlaps_a_xz);
2176 affine_fn_free (overlaps_b_xz);
2177 affine_fn_free (overlaps_a_yz);
2178 affine_fn_free (overlaps_b_yz);
2179 affine_fn_free (overlaps_a_xyz);
2180 affine_fn_free (overlaps_b_xyz);
2181 }
2182
2183 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2184
2185 static void
2186 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2187 int size)
2188 {
2189 memcpy (vec2, vec1, size * sizeof (*vec1));
2190 }
2191
2192 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2193
2194 static void
2195 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2196 int m, int n)
2197 {
2198 int i;
2199
2200 for (i = 0; i < m; i++)
2201 lambda_vector_copy (mat1[i], mat2[i], n);
2202 }
2203
2204 /* Store the N x N identity matrix in MAT. */
2205
2206 static void
2207 lambda_matrix_id (lambda_matrix mat, int size)
2208 {
2209 int i, j;
2210
2211 for (i = 0; i < size; i++)
2212 for (j = 0; j < size; j++)
2213 mat[i][j] = (i == j) ? 1 : 0;
2214 }
2215
2216 /* Return the first nonzero element of vector VEC1 between START and N.
2217 We must have START <= N. Returns N if VEC1 is the zero vector. */
2218
2219 static int
2220 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2221 {
2222 int j = start;
2223 while (j < n && vec1[j] == 0)
2224 j++;
2225 return j;
2226 }
2227
2228 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2229 R2 = R2 + CONST1 * R1. */
2230
2231 static void
2232 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2233 {
2234 int i;
2235
2236 if (const1 == 0)
2237 return;
2238
2239 for (i = 0; i < n; i++)
2240 mat[r2][i] += const1 * mat[r1][i];
2241 }
2242
2243 /* Swap rows R1 and R2 in matrix MAT. */
2244
2245 static void
2246 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2247 {
2248 lambda_vector row;
2249
2250 row = mat[r1];
2251 mat[r1] = mat[r2];
2252 mat[r2] = row;
2253 }
2254
2255 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2256 and store the result in VEC2. */
2257
2258 static void
2259 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2260 int size, int const1)
2261 {
2262 int i;
2263
2264 if (const1 == 0)
2265 lambda_vector_clear (vec2, size);
2266 else
2267 for (i = 0; i < size; i++)
2268 vec2[i] = const1 * vec1[i];
2269 }
2270
2271 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2272
2273 static void
2274 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2275 int size)
2276 {
2277 lambda_vector_mult_const (vec1, vec2, size, -1);
2278 }
2279
2280 /* Negate row R1 of matrix MAT which has N columns. */
2281
2282 static void
2283 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2284 {
2285 lambda_vector_negate (mat[r1], mat[r1], n);
2286 }
2287
2288 /* Return true if two vectors are equal. */
2289
2290 static bool
2291 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2292 {
2293 int i;
2294 for (i = 0; i < size; i++)
2295 if (vec1[i] != vec2[i])
2296 return false;
2297 return true;
2298 }
2299
2300 /* Given an M x N integer matrix A, this function determines an M x
2301 M unimodular matrix U, and an M x N echelon matrix S such that
2302 "U.A = S". This decomposition is also known as "right Hermite".
2303
2304 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2305 Restructuring Compilers" Utpal Banerjee. */
2306
2307 static void
2308 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2309 lambda_matrix S, lambda_matrix U)
2310 {
2311 int i, j, i0 = 0;
2312
2313 lambda_matrix_copy (A, S, m, n);
2314 lambda_matrix_id (U, m);
2315
2316 for (j = 0; j < n; j++)
2317 {
2318 if (lambda_vector_first_nz (S[j], m, i0) < m)
2319 {
2320 ++i0;
2321 for (i = m - 1; i >= i0; i--)
2322 {
2323 while (S[i][j] != 0)
2324 {
2325 int sigma, factor, a, b;
2326
2327 a = S[i-1][j];
2328 b = S[i][j];
2329 sigma = (a * b < 0) ? -1: 1;
2330 a = abs (a);
2331 b = abs (b);
2332 factor = sigma * (a / b);
2333
2334 lambda_matrix_row_add (S, n, i, i-1, -factor);
2335 lambda_matrix_row_exchange (S, i, i-1);
2336
2337 lambda_matrix_row_add (U, m, i, i-1, -factor);
2338 lambda_matrix_row_exchange (U, i, i-1);
2339 }
2340 }
2341 }
2342 }
2343 }
2344
2345 /* Determines the overlapping elements due to accesses CHREC_A and
2346 CHREC_B, that are affine functions. This function cannot handle
2347 symbolic evolution functions, ie. when initial conditions are
2348 parameters, because it uses lambda matrices of integers. */
2349
2350 static void
2351 analyze_subscript_affine_affine (tree chrec_a,
2352 tree chrec_b,
2353 conflict_function **overlaps_a,
2354 conflict_function **overlaps_b,
2355 tree *last_conflicts)
2356 {
2357 unsigned nb_vars_a, nb_vars_b, dim;
2358 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2359 lambda_matrix A, U, S;
2360 struct obstack scratch_obstack;
2361
2362 if (eq_evolutions_p (chrec_a, chrec_b))
2363 {
2364 /* The accessed index overlaps for each iteration in the
2365 loop. */
2366 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2367 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2368 *last_conflicts = chrec_dont_know;
2369 return;
2370 }
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2373
2374 /* For determining the initial intersection, we have to solve a
2375 Diophantine equation. This is the most time consuming part.
2376
2377 For answering to the question: "Is there a dependence?" we have
2378 to prove that there exists a solution to the Diophantine
2379 equation, and that the solution is in the iteration domain,
2380 i.e. the solution is positive or zero, and that the solution
2381 happens before the upper bound loop.nb_iterations. Otherwise
2382 there is no dependence. This function outputs a description of
2383 the iterations that hold the intersections. */
2384
2385 nb_vars_a = nb_vars_in_chrec (chrec_a);
2386 nb_vars_b = nb_vars_in_chrec (chrec_b);
2387
2388 gcc_obstack_init (&scratch_obstack);
2389
2390 dim = nb_vars_a + nb_vars_b;
2391 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2392 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2393 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2394
2395 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2396 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2397 gamma = init_b - init_a;
2398
2399 /* Don't do all the hard work of solving the Diophantine equation
2400 when we already know the solution: for example,
2401 | {3, +, 1}_1
2402 | {3, +, 4}_2
2403 | gamma = 3 - 3 = 0.
2404 Then the first overlap occurs during the first iterations:
2405 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2406 */
2407 if (gamma == 0)
2408 {
2409 if (nb_vars_a == 1 && nb_vars_b == 1)
2410 {
2411 HOST_WIDE_INT step_a, step_b;
2412 HOST_WIDE_INT niter, niter_a, niter_b;
2413 affine_fn ova, ovb;
2414
2415 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2416 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2417 niter = MIN (niter_a, niter_b);
2418 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2419 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2420
2421 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2422 &ova, &ovb,
2423 last_conflicts, 1);
2424 *overlaps_a = conflict_fn (1, ova);
2425 *overlaps_b = conflict_fn (1, ovb);
2426 }
2427
2428 else if (nb_vars_a == 2 && nb_vars_b == 1)
2429 compute_overlap_steps_for_affine_1_2
2430 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2431
2432 else if (nb_vars_a == 1 && nb_vars_b == 2)
2433 compute_overlap_steps_for_affine_1_2
2434 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2435
2436 else
2437 {
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2440 *overlaps_a = conflict_fn_not_known ();
2441 *overlaps_b = conflict_fn_not_known ();
2442 *last_conflicts = chrec_dont_know;
2443 }
2444 goto end_analyze_subs_aa;
2445 }
2446
2447 /* U.A = S */
2448 lambda_matrix_right_hermite (A, dim, 1, S, U);
2449
2450 if (S[0][0] < 0)
2451 {
2452 S[0][0] *= -1;
2453 lambda_matrix_row_negate (U, dim, 0);
2454 }
2455 gcd_alpha_beta = S[0][0];
2456
2457 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2458 but that is a quite strange case. Instead of ICEing, answer
2459 don't know. */
2460 if (gcd_alpha_beta == 0)
2461 {
2462 *overlaps_a = conflict_fn_not_known ();
2463 *overlaps_b = conflict_fn_not_known ();
2464 *last_conflicts = chrec_dont_know;
2465 goto end_analyze_subs_aa;
2466 }
2467
2468 /* The classic "gcd-test". */
2469 if (!int_divides_p (gcd_alpha_beta, gamma))
2470 {
2471 /* The "gcd-test" has determined that there is no integer
2472 solution, i.e. there is no dependence. */
2473 *overlaps_a = conflict_fn_no_dependence ();
2474 *overlaps_b = conflict_fn_no_dependence ();
2475 *last_conflicts = integer_zero_node;
2476 }
2477
2478 /* Both access functions are univariate. This includes SIV and MIV cases. */
2479 else if (nb_vars_a == 1 && nb_vars_b == 1)
2480 {
2481 /* Both functions should have the same evolution sign. */
2482 if (((A[0][0] > 0 && -A[1][0] > 0)
2483 || (A[0][0] < 0 && -A[1][0] < 0)))
2484 {
2485 /* The solutions are given by:
2486 |
2487 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2488 | [u21 u22] [y0]
2489
2490 For a given integer t. Using the following variables,
2491
2492 | i0 = u11 * gamma / gcd_alpha_beta
2493 | j0 = u12 * gamma / gcd_alpha_beta
2494 | i1 = u21
2495 | j1 = u22
2496
2497 the solutions are:
2498
2499 | x0 = i0 + i1 * t,
2500 | y0 = j0 + j1 * t. */
2501 HOST_WIDE_INT i0, j0, i1, j1;
2502
2503 i0 = U[0][0] * gamma / gcd_alpha_beta;
2504 j0 = U[0][1] * gamma / gcd_alpha_beta;
2505 i1 = U[1][0];
2506 j1 = U[1][1];
2507
2508 if ((i1 == 0 && i0 < 0)
2509 || (j1 == 0 && j0 < 0))
2510 {
2511 /* There is no solution.
2512 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2513 falls in here, but for the moment we don't look at the
2514 upper bound of the iteration domain. */
2515 *overlaps_a = conflict_fn_no_dependence ();
2516 *overlaps_b = conflict_fn_no_dependence ();
2517 *last_conflicts = integer_zero_node;
2518 goto end_analyze_subs_aa;
2519 }
2520
2521 if (i1 > 0 && j1 > 0)
2522 {
2523 HOST_WIDE_INT niter_a = max_stmt_executions_int
2524 (get_chrec_loop (chrec_a), true);
2525 HOST_WIDE_INT niter_b = max_stmt_executions_int
2526 (get_chrec_loop (chrec_b), true);
2527 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2528
2529 /* (X0, Y0) is a solution of the Diophantine equation:
2530 "chrec_a (X0) = chrec_b (Y0)". */
2531 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2532 CEIL (-j0, j1));
2533 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2534 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2535
2536 /* (X1, Y1) is the smallest positive solution of the eq
2537 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2538 first conflict occurs. */
2539 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2540 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2541 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2542
2543 if (niter > 0)
2544 {
2545 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2546 FLOOR_DIV (niter - j0, j1));
2547 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2548
2549 /* If the overlap occurs outside of the bounds of the
2550 loop, there is no dependence. */
2551 if (x1 >= niter || y1 >= niter)
2552 {
2553 *overlaps_a = conflict_fn_no_dependence ();
2554 *overlaps_b = conflict_fn_no_dependence ();
2555 *last_conflicts = integer_zero_node;
2556 goto end_analyze_subs_aa;
2557 }
2558 else
2559 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2560 }
2561 else
2562 *last_conflicts = chrec_dont_know;
2563
2564 *overlaps_a
2565 = conflict_fn (1,
2566 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2567 1,
2568 build_int_cst (NULL_TREE, i1)));
2569 *overlaps_b
2570 = conflict_fn (1,
2571 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2572 1,
2573 build_int_cst (NULL_TREE, j1)));
2574 }
2575 else
2576 {
2577 /* FIXME: For the moment, the upper bound of the
2578 iteration domain for i and j is not checked. */
2579 if (dump_file && (dump_flags & TDF_DETAILS))
2580 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2581 *overlaps_a = conflict_fn_not_known ();
2582 *overlaps_b = conflict_fn_not_known ();
2583 *last_conflicts = chrec_dont_know;
2584 }
2585 }
2586 else
2587 {
2588 if (dump_file && (dump_flags & TDF_DETAILS))
2589 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2590 *overlaps_a = conflict_fn_not_known ();
2591 *overlaps_b = conflict_fn_not_known ();
2592 *last_conflicts = chrec_dont_know;
2593 }
2594 }
2595 else
2596 {
2597 if (dump_file && (dump_flags & TDF_DETAILS))
2598 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2599 *overlaps_a = conflict_fn_not_known ();
2600 *overlaps_b = conflict_fn_not_known ();
2601 *last_conflicts = chrec_dont_know;
2602 }
2603
2604 end_analyze_subs_aa:
2605 obstack_free (&scratch_obstack, NULL);
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2607 {
2608 fprintf (dump_file, " (overlaps_a = ");
2609 dump_conflict_function (dump_file, *overlaps_a);
2610 fprintf (dump_file, ")\n (overlaps_b = ");
2611 dump_conflict_function (dump_file, *overlaps_b);
2612 fprintf (dump_file, ")\n");
2613 fprintf (dump_file, ")\n");
2614 }
2615 }
2616
2617 /* Returns true when analyze_subscript_affine_affine can be used for
2618 determining the dependence relation between chrec_a and chrec_b,
2619 that contain symbols. This function modifies chrec_a and chrec_b
2620 such that the analysis result is the same, and such that they don't
2621 contain symbols, and then can safely be passed to the analyzer.
2622
2623 Example: The analysis of the following tuples of evolutions produce
2624 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2625 vs. {0, +, 1}_1
2626
2627 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2628 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2629 */
2630
2631 static bool
2632 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2633 {
2634 tree diff, type, left_a, left_b, right_b;
2635
2636 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2637 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2638 /* FIXME: For the moment not handled. Might be refined later. */
2639 return false;
2640
2641 type = chrec_type (*chrec_a);
2642 left_a = CHREC_LEFT (*chrec_a);
2643 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2644 diff = chrec_fold_minus (type, left_a, left_b);
2645
2646 if (!evolution_function_is_constant_p (diff))
2647 return false;
2648
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2650 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2651
2652 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2653 diff, CHREC_RIGHT (*chrec_a));
2654 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2655 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2656 build_int_cst (type, 0),
2657 right_b);
2658 return true;
2659 }
2660
2661 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2662 *OVERLAPS_B are initialized to the functions that describe the
2663 relation between the elements accessed twice by CHREC_A and
2664 CHREC_B. For k >= 0, the following property is verified:
2665
2666 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2667
2668 static void
2669 analyze_siv_subscript (tree chrec_a,
2670 tree chrec_b,
2671 conflict_function **overlaps_a,
2672 conflict_function **overlaps_b,
2673 tree *last_conflicts,
2674 int loop_nest_num)
2675 {
2676 dependence_stats.num_siv++;
2677
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "(analyze_siv_subscript \n");
2680
2681 if (evolution_function_is_constant_p (chrec_a)
2682 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2683 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2684 overlaps_a, overlaps_b, last_conflicts);
2685
2686 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2687 && evolution_function_is_constant_p (chrec_b))
2688 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2689 overlaps_b, overlaps_a, last_conflicts);
2690
2691 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2692 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2693 {
2694 if (!chrec_contains_symbols (chrec_a)
2695 && !chrec_contains_symbols (chrec_b))
2696 {
2697 analyze_subscript_affine_affine (chrec_a, chrec_b,
2698 overlaps_a, overlaps_b,
2699 last_conflicts);
2700
2701 if (CF_NOT_KNOWN_P (*overlaps_a)
2702 || CF_NOT_KNOWN_P (*overlaps_b))
2703 dependence_stats.num_siv_unimplemented++;
2704 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2705 || CF_NO_DEPENDENCE_P (*overlaps_b))
2706 dependence_stats.num_siv_independent++;
2707 else
2708 dependence_stats.num_siv_dependent++;
2709 }
2710 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2711 &chrec_b))
2712 {
2713 analyze_subscript_affine_affine (chrec_a, chrec_b,
2714 overlaps_a, overlaps_b,
2715 last_conflicts);
2716
2717 if (CF_NOT_KNOWN_P (*overlaps_a)
2718 || CF_NOT_KNOWN_P (*overlaps_b))
2719 dependence_stats.num_siv_unimplemented++;
2720 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2721 || CF_NO_DEPENDENCE_P (*overlaps_b))
2722 dependence_stats.num_siv_independent++;
2723 else
2724 dependence_stats.num_siv_dependent++;
2725 }
2726 else
2727 goto siv_subscript_dontknow;
2728 }
2729
2730 else
2731 {
2732 siv_subscript_dontknow:;
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "siv test failed: unimplemented.\n");
2735 *overlaps_a = conflict_fn_not_known ();
2736 *overlaps_b = conflict_fn_not_known ();
2737 *last_conflicts = chrec_dont_know;
2738 dependence_stats.num_siv_unimplemented++;
2739 }
2740
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, ")\n");
2743 }
2744
2745 /* Returns false if we can prove that the greatest common divisor of the steps
2746 of CHREC does not divide CST, false otherwise. */
2747
2748 static bool
2749 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2750 {
2751 HOST_WIDE_INT cd = 0, val;
2752 tree step;
2753
2754 if (!host_integerp (cst, 0))
2755 return true;
2756 val = tree_low_cst (cst, 0);
2757
2758 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2759 {
2760 step = CHREC_RIGHT (chrec);
2761 if (!host_integerp (step, 0))
2762 return true;
2763 cd = gcd (cd, tree_low_cst (step, 0));
2764 chrec = CHREC_LEFT (chrec);
2765 }
2766
2767 return val % cd == 0;
2768 }
2769
2770 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2771 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2772 functions that describe the relation between the elements accessed
2773 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2774 is verified:
2775
2776 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2777
2778 static void
2779 analyze_miv_subscript (tree chrec_a,
2780 tree chrec_b,
2781 conflict_function **overlaps_a,
2782 conflict_function **overlaps_b,
2783 tree *last_conflicts,
2784 struct loop *loop_nest)
2785 {
2786 tree type, difference;
2787
2788 dependence_stats.num_miv++;
2789 if (dump_file && (dump_flags & TDF_DETAILS))
2790 fprintf (dump_file, "(analyze_miv_subscript \n");
2791
2792 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2793 chrec_a = chrec_convert (type, chrec_a, NULL);
2794 chrec_b = chrec_convert (type, chrec_b, NULL);
2795 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2796
2797 if (eq_evolutions_p (chrec_a, chrec_b))
2798 {
2799 /* Access functions are the same: all the elements are accessed
2800 in the same order. */
2801 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2802 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2803 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2804 dependence_stats.num_miv_dependent++;
2805 }
2806
2807 else if (evolution_function_is_constant_p (difference)
2808 /* For the moment, the following is verified:
2809 evolution_function_is_affine_multivariate_p (chrec_a,
2810 loop_nest->num) */
2811 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2812 {
2813 /* testsuite/.../ssa-chrec-33.c
2814 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2815
2816 The difference is 1, and all the evolution steps are multiples
2817 of 2, consequently there are no overlapping elements. */
2818 *overlaps_a = conflict_fn_no_dependence ();
2819 *overlaps_b = conflict_fn_no_dependence ();
2820 *last_conflicts = integer_zero_node;
2821 dependence_stats.num_miv_independent++;
2822 }
2823
2824 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2825 && !chrec_contains_symbols (chrec_a)
2826 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2827 && !chrec_contains_symbols (chrec_b))
2828 {
2829 /* testsuite/.../ssa-chrec-35.c
2830 {0, +, 1}_2 vs. {0, +, 1}_3
2831 the overlapping elements are respectively located at iterations:
2832 {0, +, 1}_x and {0, +, 1}_x,
2833 in other words, we have the equality:
2834 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2835
2836 Other examples:
2837 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2838 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2839
2840 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2841 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2842 */
2843 analyze_subscript_affine_affine (chrec_a, chrec_b,
2844 overlaps_a, overlaps_b, last_conflicts);
2845
2846 if (CF_NOT_KNOWN_P (*overlaps_a)
2847 || CF_NOT_KNOWN_P (*overlaps_b))
2848 dependence_stats.num_miv_unimplemented++;
2849 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2850 || CF_NO_DEPENDENCE_P (*overlaps_b))
2851 dependence_stats.num_miv_independent++;
2852 else
2853 dependence_stats.num_miv_dependent++;
2854 }
2855
2856 else
2857 {
2858 /* When the analysis is too difficult, answer "don't know". */
2859 if (dump_file && (dump_flags & TDF_DETAILS))
2860 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2861
2862 *overlaps_a = conflict_fn_not_known ();
2863 *overlaps_b = conflict_fn_not_known ();
2864 *last_conflicts = chrec_dont_know;
2865 dependence_stats.num_miv_unimplemented++;
2866 }
2867
2868 if (dump_file && (dump_flags & TDF_DETAILS))
2869 fprintf (dump_file, ")\n");
2870 }
2871
2872 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2873 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2874 OVERLAP_ITERATIONS_B are initialized with two functions that
2875 describe the iterations that contain conflicting elements.
2876
2877 Remark: For an integer k >= 0, the following equality is true:
2878
2879 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2880 */
2881
2882 static void
2883 analyze_overlapping_iterations (tree chrec_a,
2884 tree chrec_b,
2885 conflict_function **overlap_iterations_a,
2886 conflict_function **overlap_iterations_b,
2887 tree *last_conflicts, struct loop *loop_nest)
2888 {
2889 unsigned int lnn = loop_nest->num;
2890
2891 dependence_stats.num_subscript_tests++;
2892
2893 if (dump_file && (dump_flags & TDF_DETAILS))
2894 {
2895 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2896 fprintf (dump_file, " (chrec_a = ");
2897 print_generic_expr (dump_file, chrec_a, 0);
2898 fprintf (dump_file, ")\n (chrec_b = ");
2899 print_generic_expr (dump_file, chrec_b, 0);
2900 fprintf (dump_file, ")\n");
2901 }
2902
2903 if (chrec_a == NULL_TREE
2904 || chrec_b == NULL_TREE
2905 || chrec_contains_undetermined (chrec_a)
2906 || chrec_contains_undetermined (chrec_b))
2907 {
2908 dependence_stats.num_subscript_undetermined++;
2909
2910 *overlap_iterations_a = conflict_fn_not_known ();
2911 *overlap_iterations_b = conflict_fn_not_known ();
2912 }
2913
2914 /* If they are the same chrec, and are affine, they overlap
2915 on every iteration. */
2916 else if (eq_evolutions_p (chrec_a, chrec_b)
2917 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2918 || operand_equal_p (chrec_a, chrec_b, 0)))
2919 {
2920 dependence_stats.num_same_subscript_function++;
2921 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2922 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2923 *last_conflicts = chrec_dont_know;
2924 }
2925
2926 /* If they aren't the same, and aren't affine, we can't do anything
2927 yet. */
2928 else if ((chrec_contains_symbols (chrec_a)
2929 || chrec_contains_symbols (chrec_b))
2930 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2931 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2932 {
2933 dependence_stats.num_subscript_undetermined++;
2934 *overlap_iterations_a = conflict_fn_not_known ();
2935 *overlap_iterations_b = conflict_fn_not_known ();
2936 }
2937
2938 else if (ziv_subscript_p (chrec_a, chrec_b))
2939 analyze_ziv_subscript (chrec_a, chrec_b,
2940 overlap_iterations_a, overlap_iterations_b,
2941 last_conflicts);
2942
2943 else if (siv_subscript_p (chrec_a, chrec_b))
2944 analyze_siv_subscript (chrec_a, chrec_b,
2945 overlap_iterations_a, overlap_iterations_b,
2946 last_conflicts, lnn);
2947
2948 else
2949 analyze_miv_subscript (chrec_a, chrec_b,
2950 overlap_iterations_a, overlap_iterations_b,
2951 last_conflicts, loop_nest);
2952
2953 if (dump_file && (dump_flags & TDF_DETAILS))
2954 {
2955 fprintf (dump_file, " (overlap_iterations_a = ");
2956 dump_conflict_function (dump_file, *overlap_iterations_a);
2957 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2958 dump_conflict_function (dump_file, *overlap_iterations_b);
2959 fprintf (dump_file, ")\n");
2960 fprintf (dump_file, ")\n");
2961 }
2962 }
2963
2964 /* Helper function for uniquely inserting distance vectors. */
2965
2966 static void
2967 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2968 {
2969 unsigned i;
2970 lambda_vector v;
2971
2972 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2973 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2974 return;
2975
2976 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2977 }
2978
2979 /* Helper function for uniquely inserting direction vectors. */
2980
2981 static void
2982 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2983 {
2984 unsigned i;
2985 lambda_vector v;
2986
2987 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2988 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2989 return;
2990
2991 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2992 }
2993
2994 /* Add a distance of 1 on all the loops outer than INDEX. If we
2995 haven't yet determined a distance for this outer loop, push a new
2996 distance vector composed of the previous distance, and a distance
2997 of 1 for this outer loop. Example:
2998
2999 | loop_1
3000 | loop_2
3001 | A[10]
3002 | endloop_2
3003 | endloop_1
3004
3005 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3006 save (0, 1), then we have to save (1, 0). */
3007
3008 static void
3009 add_outer_distances (struct data_dependence_relation *ddr,
3010 lambda_vector dist_v, int index)
3011 {
3012 /* For each outer loop where init_v is not set, the accesses are
3013 in dependence of distance 1 in the loop. */
3014 while (--index >= 0)
3015 {
3016 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3017 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3018 save_v[index] = 1;
3019 save_dist_v (ddr, save_v);
3020 }
3021 }
3022
3023 /* Return false when fail to represent the data dependence as a
3024 distance vector. INIT_B is set to true when a component has been
3025 added to the distance vector DIST_V. INDEX_CARRY is then set to
3026 the index in DIST_V that carries the dependence. */
3027
3028 static bool
3029 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3030 struct data_reference *ddr_a,
3031 struct data_reference *ddr_b,
3032 lambda_vector dist_v, bool *init_b,
3033 int *index_carry)
3034 {
3035 unsigned i;
3036 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3037
3038 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3039 {
3040 tree access_fn_a, access_fn_b;
3041 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3042
3043 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3044 {
3045 non_affine_dependence_relation (ddr);
3046 return false;
3047 }
3048
3049 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3050 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3051
3052 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3053 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3054 {
3055 int dist, index;
3056 int var_a = CHREC_VARIABLE (access_fn_a);
3057 int var_b = CHREC_VARIABLE (access_fn_b);
3058
3059 if (var_a != var_b
3060 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3061 {
3062 non_affine_dependence_relation (ddr);
3063 return false;
3064 }
3065
3066 dist = int_cst_value (SUB_DISTANCE (subscript));
3067 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3068 *index_carry = MIN (index, *index_carry);
3069
3070 /* This is the subscript coupling test. If we have already
3071 recorded a distance for this loop (a distance coming from
3072 another subscript), it should be the same. For example,
3073 in the following code, there is no dependence:
3074
3075 | loop i = 0, N, 1
3076 | T[i+1][i] = ...
3077 | ... = T[i][i]
3078 | endloop
3079 */
3080 if (init_v[index] != 0 && dist_v[index] != dist)
3081 {
3082 finalize_ddr_dependent (ddr, chrec_known);
3083 return false;
3084 }
3085
3086 dist_v[index] = dist;
3087 init_v[index] = 1;
3088 *init_b = true;
3089 }
3090 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3091 {
3092 /* This can be for example an affine vs. constant dependence
3093 (T[i] vs. T[3]) that is not an affine dependence and is
3094 not representable as a distance vector. */
3095 non_affine_dependence_relation (ddr);
3096 return false;
3097 }
3098 }
3099
3100 return true;
3101 }
3102
3103 /* Return true when the DDR contains only constant access functions. */
3104
3105 static bool
3106 constant_access_functions (const struct data_dependence_relation *ddr)
3107 {
3108 unsigned i;
3109
3110 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3111 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3112 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3113 return false;
3114
3115 return true;
3116 }
3117
3118 /* Helper function for the case where DDR_A and DDR_B are the same
3119 multivariate access function with a constant step. For an example
3120 see pr34635-1.c. */
3121
3122 static void
3123 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3124 {
3125 int x_1, x_2;
3126 tree c_1 = CHREC_LEFT (c_2);
3127 tree c_0 = CHREC_LEFT (c_1);
3128 lambda_vector dist_v;
3129 int v1, v2, cd;
3130
3131 /* Polynomials with more than 2 variables are not handled yet. When
3132 the evolution steps are parameters, it is not possible to
3133 represent the dependence using classical distance vectors. */
3134 if (TREE_CODE (c_0) != INTEGER_CST
3135 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3136 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3137 {
3138 DDR_AFFINE_P (ddr) = false;
3139 return;
3140 }
3141
3142 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3143 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3144
3145 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3146 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3147 v1 = int_cst_value (CHREC_RIGHT (c_1));
3148 v2 = int_cst_value (CHREC_RIGHT (c_2));
3149 cd = gcd (v1, v2);
3150 v1 /= cd;
3151 v2 /= cd;
3152
3153 if (v2 < 0)
3154 {
3155 v2 = -v2;
3156 v1 = -v1;
3157 }
3158
3159 dist_v[x_1] = v2;
3160 dist_v[x_2] = -v1;
3161 save_dist_v (ddr, dist_v);
3162
3163 add_outer_distances (ddr, dist_v, x_1);
3164 }
3165
3166 /* Helper function for the case where DDR_A and DDR_B are the same
3167 access functions. */
3168
3169 static void
3170 add_other_self_distances (struct data_dependence_relation *ddr)
3171 {
3172 lambda_vector dist_v;
3173 unsigned i;
3174 int index_carry = DDR_NB_LOOPS (ddr);
3175
3176 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3177 {
3178 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3179
3180 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3181 {
3182 if (!evolution_function_is_univariate_p (access_fun))
3183 {
3184 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3185 {
3186 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3187 return;
3188 }
3189
3190 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3191
3192 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3193 add_multivariate_self_dist (ddr, access_fun);
3194 else
3195 /* The evolution step is not constant: it varies in
3196 the outer loop, so this cannot be represented by a
3197 distance vector. For example in pr34635.c the
3198 evolution is {0, +, {0, +, 4}_1}_2. */
3199 DDR_AFFINE_P (ddr) = false;
3200
3201 return;
3202 }
3203
3204 index_carry = MIN (index_carry,
3205 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3206 DDR_LOOP_NEST (ddr)));
3207 }
3208 }
3209
3210 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3211 add_outer_distances (ddr, dist_v, index_carry);
3212 }
3213
3214 static void
3215 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3216 {
3217 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3218
3219 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3220 save_dist_v (ddr, dist_v);
3221 }
3222
3223 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3224 is the case for example when access functions are the same and
3225 equal to a constant, as in:
3226
3227 | loop_1
3228 | A[3] = ...
3229 | ... = A[3]
3230 | endloop_1
3231
3232 in which case the distance vectors are (0) and (1). */
3233
3234 static void
3235 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3236 {
3237 unsigned i, j;
3238
3239 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3240 {
3241 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3242 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3243 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3244
3245 for (j = 0; j < ca->n; j++)
3246 if (affine_function_zero_p (ca->fns[j]))
3247 {
3248 insert_innermost_unit_dist_vector (ddr);
3249 return;
3250 }
3251
3252 for (j = 0; j < cb->n; j++)
3253 if (affine_function_zero_p (cb->fns[j]))
3254 {
3255 insert_innermost_unit_dist_vector (ddr);
3256 return;
3257 }
3258 }
3259 }
3260
3261 /* Compute the classic per loop distance vector. DDR is the data
3262 dependence relation to build a vector from. Return false when fail
3263 to represent the data dependence as a distance vector. */
3264
3265 static bool
3266 build_classic_dist_vector (struct data_dependence_relation *ddr,
3267 struct loop *loop_nest)
3268 {
3269 bool init_b = false;
3270 int index_carry = DDR_NB_LOOPS (ddr);
3271 lambda_vector dist_v;
3272
3273 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3274 return false;
3275
3276 if (same_access_functions (ddr))
3277 {
3278 /* Save the 0 vector. */
3279 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3280 save_dist_v (ddr, dist_v);
3281
3282 if (constant_access_functions (ddr))
3283 add_distance_for_zero_overlaps (ddr);
3284
3285 if (DDR_NB_LOOPS (ddr) > 1)
3286 add_other_self_distances (ddr);
3287
3288 return true;
3289 }
3290
3291 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3292 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3293 dist_v, &init_b, &index_carry))
3294 return false;
3295
3296 /* Save the distance vector if we initialized one. */
3297 if (init_b)
3298 {
3299 /* Verify a basic constraint: classic distance vectors should
3300 always be lexicographically positive.
3301
3302 Data references are collected in the order of execution of
3303 the program, thus for the following loop
3304
3305 | for (i = 1; i < 100; i++)
3306 | for (j = 1; j < 100; j++)
3307 | {
3308 | t = T[j+1][i-1]; // A
3309 | T[j][i] = t + 2; // B
3310 | }
3311
3312 references are collected following the direction of the wind:
3313 A then B. The data dependence tests are performed also
3314 following this order, such that we're looking at the distance
3315 separating the elements accessed by A from the elements later
3316 accessed by B. But in this example, the distance returned by
3317 test_dep (A, B) is lexicographically negative (-1, 1), that
3318 means that the access A occurs later than B with respect to
3319 the outer loop, ie. we're actually looking upwind. In this
3320 case we solve test_dep (B, A) looking downwind to the
3321 lexicographically positive solution, that returns the
3322 distance vector (1, -1). */
3323 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3324 {
3325 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3326 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3327 loop_nest))
3328 return false;
3329 compute_subscript_distance (ddr);
3330 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3331 save_v, &init_b, &index_carry))
3332 return false;
3333 save_dist_v (ddr, save_v);
3334 DDR_REVERSED_P (ddr) = true;
3335
3336 /* In this case there is a dependence forward for all the
3337 outer loops:
3338
3339 | for (k = 1; k < 100; k++)
3340 | for (i = 1; i < 100; i++)
3341 | for (j = 1; j < 100; j++)
3342 | {
3343 | t = T[j+1][i-1]; // A
3344 | T[j][i] = t + 2; // B
3345 | }
3346
3347 the vectors are:
3348 (0, 1, -1)
3349 (1, 1, -1)
3350 (1, -1, 1)
3351 */
3352 if (DDR_NB_LOOPS (ddr) > 1)
3353 {
3354 add_outer_distances (ddr, save_v, index_carry);
3355 add_outer_distances (ddr, dist_v, index_carry);
3356 }
3357 }
3358 else
3359 {
3360 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3361 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3362
3363 if (DDR_NB_LOOPS (ddr) > 1)
3364 {
3365 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3366
3367 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3368 DDR_A (ddr), loop_nest))
3369 return false;
3370 compute_subscript_distance (ddr);
3371 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3372 opposite_v, &init_b,
3373 &index_carry))
3374 return false;
3375
3376 save_dist_v (ddr, save_v);
3377 add_outer_distances (ddr, dist_v, index_carry);
3378 add_outer_distances (ddr, opposite_v, index_carry);
3379 }
3380 else
3381 save_dist_v (ddr, save_v);
3382 }
3383 }
3384 else
3385 {
3386 /* There is a distance of 1 on all the outer loops: Example:
3387 there is a dependence of distance 1 on loop_1 for the array A.
3388
3389 | loop_1
3390 | A[5] = ...
3391 | endloop
3392 */
3393 add_outer_distances (ddr, dist_v,
3394 lambda_vector_first_nz (dist_v,
3395 DDR_NB_LOOPS (ddr), 0));
3396 }
3397
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3399 {
3400 unsigned i;
3401
3402 fprintf (dump_file, "(build_classic_dist_vector\n");
3403 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3404 {
3405 fprintf (dump_file, " dist_vector = (");
3406 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3407 DDR_NB_LOOPS (ddr));
3408 fprintf (dump_file, " )\n");
3409 }
3410 fprintf (dump_file, ")\n");
3411 }
3412
3413 return true;
3414 }
3415
3416 /* Return the direction for a given distance.
3417 FIXME: Computing dir this way is suboptimal, since dir can catch
3418 cases that dist is unable to represent. */
3419
3420 static inline enum data_dependence_direction
3421 dir_from_dist (int dist)
3422 {
3423 if (dist > 0)
3424 return dir_positive;
3425 else if (dist < 0)
3426 return dir_negative;
3427 else
3428 return dir_equal;
3429 }
3430
3431 /* Compute the classic per loop direction vector. DDR is the data
3432 dependence relation to build a vector from. */
3433
3434 static void
3435 build_classic_dir_vector (struct data_dependence_relation *ddr)
3436 {
3437 unsigned i, j;
3438 lambda_vector dist_v;
3439
3440 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3441 {
3442 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3443
3444 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3445 dir_v[j] = dir_from_dist (dist_v[j]);
3446
3447 save_dir_v (ddr, dir_v);
3448 }
3449 }
3450
3451 /* Helper function. Returns true when there is a dependence between
3452 data references DRA and DRB. */
3453
3454 static bool
3455 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3456 struct data_reference *dra,
3457 struct data_reference *drb,
3458 struct loop *loop_nest)
3459 {
3460 unsigned int i;
3461 tree last_conflicts;
3462 struct subscript *subscript;
3463 tree res = NULL_TREE;
3464
3465 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3466 i++)
3467 {
3468 conflict_function *overlaps_a, *overlaps_b;
3469
3470 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3471 DR_ACCESS_FN (drb, i),
3472 &overlaps_a, &overlaps_b,
3473 &last_conflicts, loop_nest);
3474
3475 if (SUB_CONFLICTS_IN_A (subscript))
3476 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3477 if (SUB_CONFLICTS_IN_B (subscript))
3478 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3479
3480 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3481 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3482 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3483
3484 /* If there is any undetermined conflict function we have to
3485 give a conservative answer in case we cannot prove that
3486 no dependence exists when analyzing another subscript. */
3487 if (CF_NOT_KNOWN_P (overlaps_a)
3488 || CF_NOT_KNOWN_P (overlaps_b))
3489 {
3490 res = chrec_dont_know;
3491 continue;
3492 }
3493
3494 /* When there is a subscript with no dependence we can stop. */
3495 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3496 || CF_NO_DEPENDENCE_P (overlaps_b))
3497 {
3498 res = chrec_known;
3499 break;
3500 }
3501 }
3502
3503 if (res == NULL_TREE)
3504 return true;
3505
3506 if (res == chrec_known)
3507 dependence_stats.num_dependence_independent++;
3508 else
3509 dependence_stats.num_dependence_undetermined++;
3510 finalize_ddr_dependent (ddr, res);
3511 return false;
3512 }
3513
3514 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3515
3516 static void
3517 subscript_dependence_tester (struct data_dependence_relation *ddr,
3518 struct loop *loop_nest)
3519 {
3520
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3522 fprintf (dump_file, "(subscript_dependence_tester \n");
3523
3524 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3525 dependence_stats.num_dependence_dependent++;
3526
3527 compute_subscript_distance (ddr);
3528 if (build_classic_dist_vector (ddr, loop_nest))
3529 build_classic_dir_vector (ddr);
3530
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3532 fprintf (dump_file, ")\n");
3533 }
3534
3535 /* Returns true when all the access functions of A are affine or
3536 constant with respect to LOOP_NEST. */
3537
3538 static bool
3539 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3540 const struct loop *loop_nest)
3541 {
3542 unsigned int i;
3543 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3544 tree t;
3545
3546 FOR_EACH_VEC_ELT (tree, fns, i, t)
3547 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3548 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3549 return false;
3550
3551 return true;
3552 }
3553
3554 /* Initializes an equation for an OMEGA problem using the information
3555 contained in the ACCESS_FUN. Returns true when the operation
3556 succeeded.
3557
3558 PB is the omega constraint system.
3559 EQ is the number of the equation to be initialized.
3560 OFFSET is used for shifting the variables names in the constraints:
3561 a constrain is composed of 2 * the number of variables surrounding
3562 dependence accesses. OFFSET is set either to 0 for the first n variables,
3563 then it is set to n.
3564 ACCESS_FUN is expected to be an affine chrec. */
3565
3566 static bool
3567 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3568 unsigned int offset, tree access_fun,
3569 struct data_dependence_relation *ddr)
3570 {
3571 switch (TREE_CODE (access_fun))
3572 {
3573 case POLYNOMIAL_CHREC:
3574 {
3575 tree left = CHREC_LEFT (access_fun);
3576 tree right = CHREC_RIGHT (access_fun);
3577 int var = CHREC_VARIABLE (access_fun);
3578 unsigned var_idx;
3579
3580 if (TREE_CODE (right) != INTEGER_CST)
3581 return false;
3582
3583 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3584 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3585
3586 /* Compute the innermost loop index. */
3587 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3588
3589 if (offset == 0)
3590 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3591 += int_cst_value (right);
3592
3593 switch (TREE_CODE (left))
3594 {
3595 case POLYNOMIAL_CHREC:
3596 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3597
3598 case INTEGER_CST:
3599 pb->eqs[eq].coef[0] += int_cst_value (left);
3600 return true;
3601
3602 default:
3603 return false;
3604 }
3605 }
3606
3607 case INTEGER_CST:
3608 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3609 return true;
3610
3611 default:
3612 return false;
3613 }
3614 }
3615
3616 /* As explained in the comments preceding init_omega_for_ddr, we have
3617 to set up a system for each loop level, setting outer loops
3618 variation to zero, and current loop variation to positive or zero.
3619 Save each lexico positive distance vector. */
3620
3621 static void
3622 omega_extract_distance_vectors (omega_pb pb,
3623 struct data_dependence_relation *ddr)
3624 {
3625 int eq, geq;
3626 unsigned i, j;
3627 struct loop *loopi, *loopj;
3628 enum omega_result res;
3629
3630 /* Set a new problem for each loop in the nest. The basis is the
3631 problem that we have initialized until now. On top of this we
3632 add new constraints. */
3633 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3634 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3635 {
3636 int dist = 0;
3637 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3638 DDR_NB_LOOPS (ddr));
3639
3640 omega_copy_problem (copy, pb);
3641
3642 /* For all the outer loops "loop_j", add "dj = 0". */
3643 for (j = 0;
3644 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3645 {
3646 eq = omega_add_zero_eq (copy, omega_black);
3647 copy->eqs[eq].coef[j + 1] = 1;
3648 }
3649
3650 /* For "loop_i", add "0 <= di". */
3651 geq = omega_add_zero_geq (copy, omega_black);
3652 copy->geqs[geq].coef[i + 1] = 1;
3653
3654 /* Reduce the constraint system, and test that the current
3655 problem is feasible. */
3656 res = omega_simplify_problem (copy);
3657 if (res == omega_false
3658 || res == omega_unknown
3659 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3660 goto next_problem;
3661
3662 for (eq = 0; eq < copy->num_subs; eq++)
3663 if (copy->subs[eq].key == (int) i + 1)
3664 {
3665 dist = copy->subs[eq].coef[0];
3666 goto found_dist;
3667 }
3668
3669 if (dist == 0)
3670 {
3671 /* Reinitialize problem... */
3672 omega_copy_problem (copy, pb);
3673 for (j = 0;
3674 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3675 {
3676 eq = omega_add_zero_eq (copy, omega_black);
3677 copy->eqs[eq].coef[j + 1] = 1;
3678 }
3679
3680 /* ..., but this time "di = 1". */
3681 eq = omega_add_zero_eq (copy, omega_black);
3682 copy->eqs[eq].coef[i + 1] = 1;
3683 copy->eqs[eq].coef[0] = -1;
3684
3685 res = omega_simplify_problem (copy);
3686 if (res == omega_false
3687 || res == omega_unknown
3688 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3689 goto next_problem;
3690
3691 for (eq = 0; eq < copy->num_subs; eq++)
3692 if (copy->subs[eq].key == (int) i + 1)
3693 {
3694 dist = copy->subs[eq].coef[0];
3695 goto found_dist;
3696 }
3697 }
3698
3699 found_dist:;
3700 /* Save the lexicographically positive distance vector. */
3701 if (dist >= 0)
3702 {
3703 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3704 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3705
3706 dist_v[i] = dist;
3707
3708 for (eq = 0; eq < copy->num_subs; eq++)
3709 if (copy->subs[eq].key > 0)
3710 {
3711 dist = copy->subs[eq].coef[0];
3712 dist_v[copy->subs[eq].key - 1] = dist;
3713 }
3714
3715 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3716 dir_v[j] = dir_from_dist (dist_v[j]);
3717
3718 save_dist_v (ddr, dist_v);
3719 save_dir_v (ddr, dir_v);
3720 }
3721
3722 next_problem:;
3723 omega_free_problem (copy);
3724 }
3725 }
3726
3727 /* This is called for each subscript of a tuple of data references:
3728 insert an equality for representing the conflicts. */
3729
3730 static bool
3731 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3732 struct data_dependence_relation *ddr,
3733 omega_pb pb, bool *maybe_dependent)
3734 {
3735 int eq;
3736 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3737 TREE_TYPE (access_fun_b));
3738 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3739 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3740 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3741 tree minus_one;
3742
3743 /* When the fun_a - fun_b is not constant, the dependence is not
3744 captured by the classic distance vector representation. */
3745 if (TREE_CODE (difference) != INTEGER_CST)
3746 return false;
3747
3748 /* ZIV test. */
3749 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3750 {
3751 /* There is no dependence. */
3752 *maybe_dependent = false;
3753 return true;
3754 }
3755
3756 minus_one = build_int_cst (type, -1);
3757 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3758
3759 eq = omega_add_zero_eq (pb, omega_black);
3760 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3761 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3762 /* There is probably a dependence, but the system of
3763 constraints cannot be built: answer "don't know". */
3764 return false;
3765
3766 /* GCD test. */
3767 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3768 && !int_divides_p (lambda_vector_gcd
3769 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3770 2 * DDR_NB_LOOPS (ddr)),
3771 pb->eqs[eq].coef[0]))
3772 {
3773 /* There is no dependence. */
3774 *maybe_dependent = false;
3775 return true;
3776 }
3777
3778 return true;
3779 }
3780
3781 /* Helper function, same as init_omega_for_ddr but specialized for
3782 data references A and B. */
3783
3784 static bool
3785 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3786 struct data_dependence_relation *ddr,
3787 omega_pb pb, bool *maybe_dependent)
3788 {
3789 unsigned i;
3790 int ineq;
3791 struct loop *loopi;
3792 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3793
3794 /* Insert an equality per subscript. */
3795 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3796 {
3797 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3798 ddr, pb, maybe_dependent))
3799 return false;
3800 else if (*maybe_dependent == false)
3801 {
3802 /* There is no dependence. */
3803 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3804 return true;
3805 }
3806 }
3807
3808 /* Insert inequalities: constraints corresponding to the iteration
3809 domain, i.e. the loops surrounding the references "loop_x" and
3810 the distance variables "dx". The layout of the OMEGA
3811 representation is as follows:
3812 - coef[0] is the constant
3813 - coef[1..nb_loops] are the protected variables that will not be
3814 removed by the solver: the "dx"
3815 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3816 */
3817 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3818 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3819 {
3820 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3821
3822 /* 0 <= loop_x */
3823 ineq = omega_add_zero_geq (pb, omega_black);
3824 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3825
3826 /* 0 <= loop_x + dx */
3827 ineq = omega_add_zero_geq (pb, omega_black);
3828 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3829 pb->geqs[ineq].coef[i + 1] = 1;
3830
3831 if (nbi != -1)
3832 {
3833 /* loop_x <= nb_iters */
3834 ineq = omega_add_zero_geq (pb, omega_black);
3835 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3836 pb->geqs[ineq].coef[0] = nbi;
3837
3838 /* loop_x + dx <= nb_iters */
3839 ineq = omega_add_zero_geq (pb, omega_black);
3840 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3841 pb->geqs[ineq].coef[i + 1] = -1;
3842 pb->geqs[ineq].coef[0] = nbi;
3843
3844 /* A step "dx" bigger than nb_iters is not feasible, so
3845 add "0 <= nb_iters + dx", */
3846 ineq = omega_add_zero_geq (pb, omega_black);
3847 pb->geqs[ineq].coef[i + 1] = 1;
3848 pb->geqs[ineq].coef[0] = nbi;
3849 /* and "dx <= nb_iters". */
3850 ineq = omega_add_zero_geq (pb, omega_black);
3851 pb->geqs[ineq].coef[i + 1] = -1;
3852 pb->geqs[ineq].coef[0] = nbi;
3853 }
3854 }
3855
3856 omega_extract_distance_vectors (pb, ddr);
3857
3858 return true;
3859 }
3860
3861 /* Sets up the Omega dependence problem for the data dependence
3862 relation DDR. Returns false when the constraint system cannot be
3863 built, ie. when the test answers "don't know". Returns true
3864 otherwise, and when independence has been proved (using one of the
3865 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3866 set MAYBE_DEPENDENT to true.
3867
3868 Example: for setting up the dependence system corresponding to the
3869 conflicting accesses
3870
3871 | loop_i
3872 | loop_j
3873 | A[i, i+1] = ...
3874 | ... A[2*j, 2*(i + j)]
3875 | endloop_j
3876 | endloop_i
3877
3878 the following constraints come from the iteration domain:
3879
3880 0 <= i <= Ni
3881 0 <= i + di <= Ni
3882 0 <= j <= Nj
3883 0 <= j + dj <= Nj
3884
3885 where di, dj are the distance variables. The constraints
3886 representing the conflicting elements are:
3887
3888 i = 2 * (j + dj)
3889 i + 1 = 2 * (i + di + j + dj)
3890
3891 For asking that the resulting distance vector (di, dj) be
3892 lexicographically positive, we insert the constraint "di >= 0". If
3893 "di = 0" in the solution, we fix that component to zero, and we
3894 look at the inner loops: we set a new problem where all the outer
3895 loop distances are zero, and fix this inner component to be
3896 positive. When one of the components is positive, we save that
3897 distance, and set a new problem where the distance on this loop is
3898 zero, searching for other distances in the inner loops. Here is
3899 the classic example that illustrates that we have to set for each
3900 inner loop a new problem:
3901
3902 | loop_1
3903 | loop_2
3904 | A[10]
3905 | endloop_2
3906 | endloop_1
3907
3908 we have to save two distances (1, 0) and (0, 1).
3909
3910 Given two array references, refA and refB, we have to set the
3911 dependence problem twice, refA vs. refB and refB vs. refA, and we
3912 cannot do a single test, as refB might occur before refA in the
3913 inner loops, and the contrary when considering outer loops: ex.
3914
3915 | loop_0
3916 | loop_1
3917 | loop_2
3918 | T[{1,+,1}_2][{1,+,1}_1] // refA
3919 | T[{2,+,1}_2][{0,+,1}_1] // refB
3920 | endloop_2
3921 | endloop_1
3922 | endloop_0
3923
3924 refB touches the elements in T before refA, and thus for the same
3925 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3926 but for successive loop_0 iterations, we have (1, -1, 1)
3927
3928 The Omega solver expects the distance variables ("di" in the
3929 previous example) to come first in the constraint system (as
3930 variables to be protected, or "safe" variables), the constraint
3931 system is built using the following layout:
3932
3933 "cst | distance vars | index vars".
3934 */
3935
3936 static bool
3937 init_omega_for_ddr (struct data_dependence_relation *ddr,
3938 bool *maybe_dependent)
3939 {
3940 omega_pb pb;
3941 bool res = false;
3942
3943 *maybe_dependent = true;
3944
3945 if (same_access_functions (ddr))
3946 {
3947 unsigned j;
3948 lambda_vector dir_v;
3949
3950 /* Save the 0 vector. */
3951 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3952 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3953 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3954 dir_v[j] = dir_equal;
3955 save_dir_v (ddr, dir_v);
3956
3957 /* Save the dependences carried by outer loops. */
3958 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3959 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3960 maybe_dependent);
3961 omega_free_problem (pb);
3962 return res;
3963 }
3964
3965 /* Omega expects the protected variables (those that have to be kept
3966 after elimination) to appear first in the constraint system.
3967 These variables are the distance variables. In the following
3968 initialization we declare NB_LOOPS safe variables, and the total
3969 number of variables for the constraint system is 2*NB_LOOPS. */
3970 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3971 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3972 maybe_dependent);
3973 omega_free_problem (pb);
3974
3975 /* Stop computation if not decidable, or no dependence. */
3976 if (res == false || *maybe_dependent == false)
3977 return res;
3978
3979 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3980 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3981 maybe_dependent);
3982 omega_free_problem (pb);
3983
3984 return res;
3985 }
3986
3987 /* Return true when DDR contains the same information as that stored
3988 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3989
3990 static bool
3991 ddr_consistent_p (FILE *file,
3992 struct data_dependence_relation *ddr,
3993 VEC (lambda_vector, heap) *dist_vects,
3994 VEC (lambda_vector, heap) *dir_vects)
3995 {
3996 unsigned int i, j;
3997
3998 /* If dump_file is set, output there. */
3999 if (dump_file && (dump_flags & TDF_DETAILS))
4000 file = dump_file;
4001
4002 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
4003 {
4004 lambda_vector b_dist_v;
4005 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4006 VEC_length (lambda_vector, dist_vects),
4007 DDR_NUM_DIST_VECTS (ddr));
4008
4009 fprintf (file, "Banerjee dist vectors:\n");
4010 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
4011 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4012
4013 fprintf (file, "Omega dist vectors:\n");
4014 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4015 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4016
4017 fprintf (file, "data dependence relation:\n");
4018 dump_data_dependence_relation (file, ddr);
4019
4020 fprintf (file, ")\n");
4021 return false;
4022 }
4023
4024 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
4025 {
4026 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4027 VEC_length (lambda_vector, dir_vects),
4028 DDR_NUM_DIR_VECTS (ddr));
4029 return false;
4030 }
4031
4032 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4033 {
4034 lambda_vector a_dist_v;
4035 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4036
4037 /* Distance vectors are not ordered in the same way in the DDR
4038 and in the DIST_VECTS: search for a matching vector. */
4039 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
4040 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4041 break;
4042
4043 if (j == VEC_length (lambda_vector, dist_vects))
4044 {
4045 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4046 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4047 fprintf (file, "not found in Omega dist vectors:\n");
4048 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4049 fprintf (file, "data dependence relation:\n");
4050 dump_data_dependence_relation (file, ddr);
4051 fprintf (file, ")\n");
4052 }
4053 }
4054
4055 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4056 {
4057 lambda_vector a_dir_v;
4058 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4059
4060 /* Direction vectors are not ordered in the same way in the DDR
4061 and in the DIR_VECTS: search for a matching vector. */
4062 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4063 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4064 break;
4065
4066 if (j == VEC_length (lambda_vector, dist_vects))
4067 {
4068 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4069 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4070 fprintf (file, "not found in Omega dir vectors:\n");
4071 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4072 fprintf (file, "data dependence relation:\n");
4073 dump_data_dependence_relation (file, ddr);
4074 fprintf (file, ")\n");
4075 }
4076 }
4077
4078 return true;
4079 }
4080
4081 /* This computes the affine dependence relation between A and B with
4082 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4083 independence between two accesses, while CHREC_DONT_KNOW is used
4084 for representing the unknown relation.
4085
4086 Note that it is possible to stop the computation of the dependence
4087 relation the first time we detect a CHREC_KNOWN element for a given
4088 subscript. */
4089
4090 static void
4091 compute_affine_dependence (struct data_dependence_relation *ddr,
4092 struct loop *loop_nest)
4093 {
4094 struct data_reference *dra = DDR_A (ddr);
4095 struct data_reference *drb = DDR_B (ddr);
4096
4097 if (dump_file && (dump_flags & TDF_DETAILS))
4098 {
4099 fprintf (dump_file, "(compute_affine_dependence\n");
4100 fprintf (dump_file, " stmt_a: ");
4101 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4102 fprintf (dump_file, " stmt_b: ");
4103 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4104 }
4105
4106 /* Analyze only when the dependence relation is not yet known. */
4107 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4108 {
4109 dependence_stats.num_dependence_tests++;
4110
4111 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4112 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4113 {
4114 if (flag_check_data_deps)
4115 {
4116 /* Compute the dependences using the first algorithm. */
4117 subscript_dependence_tester (ddr, loop_nest);
4118
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4120 {
4121 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4122 dump_data_dependence_relation (dump_file, ddr);
4123 }
4124
4125 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4126 {
4127 bool maybe_dependent;
4128 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4129
4130 /* Save the result of the first DD analyzer. */
4131 dist_vects = DDR_DIST_VECTS (ddr);
4132 dir_vects = DDR_DIR_VECTS (ddr);
4133
4134 /* Reset the information. */
4135 DDR_DIST_VECTS (ddr) = NULL;
4136 DDR_DIR_VECTS (ddr) = NULL;
4137
4138 /* Compute the same information using Omega. */
4139 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4140 goto csys_dont_know;
4141
4142 if (dump_file && (dump_flags & TDF_DETAILS))
4143 {
4144 fprintf (dump_file, "Omega Analyzer\n");
4145 dump_data_dependence_relation (dump_file, ddr);
4146 }
4147
4148 /* Check that we get the same information. */
4149 if (maybe_dependent)
4150 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4151 dir_vects));
4152 }
4153 }
4154 else
4155 subscript_dependence_tester (ddr, loop_nest);
4156 }
4157
4158 /* As a last case, if the dependence cannot be determined, or if
4159 the dependence is considered too difficult to determine, answer
4160 "don't know". */
4161 else
4162 {
4163 csys_dont_know:;
4164 dependence_stats.num_dependence_undetermined++;
4165
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 {
4168 fprintf (dump_file, "Data ref a:\n");
4169 dump_data_reference (dump_file, dra);
4170 fprintf (dump_file, "Data ref b:\n");
4171 dump_data_reference (dump_file, drb);
4172 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4173 }
4174 finalize_ddr_dependent (ddr, chrec_dont_know);
4175 }
4176 }
4177
4178 if (dump_file && (dump_flags & TDF_DETAILS))
4179 {
4180 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4181 fprintf (dump_file, ") -> no dependence\n");
4182 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4183 fprintf (dump_file, ") -> dependence analysis failed\n");
4184 else
4185 fprintf (dump_file, ")\n");
4186 }
4187 }
4188
4189 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4190 the data references in DATAREFS, in the LOOP_NEST. When
4191 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4192 relations. Return true when successful, i.e. data references number
4193 is small enough to be handled. */
4194
4195 bool
4196 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4197 VEC (ddr_p, heap) **dependence_relations,
4198 VEC (loop_p, heap) *loop_nest,
4199 bool compute_self_and_rr)
4200 {
4201 struct data_dependence_relation *ddr;
4202 struct data_reference *a, *b;
4203 unsigned int i, j;
4204
4205 if ((int) VEC_length (data_reference_p, datarefs)
4206 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4207 {
4208 struct data_dependence_relation *ddr;
4209
4210 /* Insert a single relation into dependence_relations:
4211 chrec_dont_know. */
4212 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4213 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4214 return false;
4215 }
4216
4217 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4218 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4219 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4220 {
4221 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4222 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4223 if (loop_nest)
4224 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4225 }
4226
4227 if (compute_self_and_rr)
4228 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4229 {
4230 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4231 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4232 if (loop_nest)
4233 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4234 }
4235
4236 return true;
4237 }
4238
4239 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4240 true if STMT clobbers memory, false otherwise. */
4241
4242 bool
4243 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4244 {
4245 bool clobbers_memory = false;
4246 data_ref_loc *ref;
4247 tree *op0, *op1;
4248 enum gimple_code stmt_code = gimple_code (stmt);
4249
4250 *references = NULL;
4251
4252 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4253 Calls have side-effects, except those to const or pure
4254 functions. */
4255 if ((stmt_code == GIMPLE_CALL
4256 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4257 || (stmt_code == GIMPLE_ASM
4258 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4259 clobbers_memory = true;
4260
4261 if (!gimple_vuse (stmt))
4262 return clobbers_memory;
4263
4264 if (stmt_code == GIMPLE_ASSIGN)
4265 {
4266 tree base;
4267 op0 = gimple_assign_lhs_ptr (stmt);
4268 op1 = gimple_assign_rhs1_ptr (stmt);
4269
4270 if (DECL_P (*op1)
4271 || (REFERENCE_CLASS_P (*op1)
4272 && (base = get_base_address (*op1))
4273 && TREE_CODE (base) != SSA_NAME))
4274 {
4275 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4276 ref->pos = op1;
4277 ref->is_read = true;
4278 }
4279 }
4280 else if (stmt_code == GIMPLE_CALL)
4281 {
4282 unsigned i, n;
4283
4284 op0 = gimple_call_lhs_ptr (stmt);
4285 n = gimple_call_num_args (stmt);
4286 for (i = 0; i < n; i++)
4287 {
4288 op1 = gimple_call_arg_ptr (stmt, i);
4289
4290 if (DECL_P (*op1)
4291 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4292 {
4293 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4294 ref->pos = op1;
4295 ref->is_read = true;
4296 }
4297 }
4298 }
4299 else
4300 return clobbers_memory;
4301
4302 if (*op0
4303 && (DECL_P (*op0)
4304 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4305 {
4306 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4307 ref->pos = op0;
4308 ref->is_read = false;
4309 }
4310 return clobbers_memory;
4311 }
4312
4313 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4314 reference, returns false, otherwise returns true. NEST is the outermost
4315 loop of the loop nest in which the references should be analyzed. */
4316
4317 bool
4318 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4319 VEC (data_reference_p, heap) **datarefs)
4320 {
4321 unsigned i;
4322 VEC (data_ref_loc, heap) *references;
4323 data_ref_loc *ref;
4324 bool ret = true;
4325 data_reference_p dr;
4326
4327 if (get_references_in_stmt (stmt, &references))
4328 {
4329 VEC_free (data_ref_loc, heap, references);
4330 return false;
4331 }
4332
4333 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4334 {
4335 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4336 *ref->pos, stmt, ref->is_read);
4337 gcc_assert (dr != NULL);
4338 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4339 }
4340 VEC_free (data_ref_loc, heap, references);
4341 return ret;
4342 }
4343
4344 /* Stores the data references in STMT to DATAREFS. If there is an
4345 unanalyzable reference, returns false, otherwise returns true.
4346 NEST is the outermost loop of the loop nest in which the references
4347 should be instantiated, LOOP is the loop in which the references
4348 should be analyzed. */
4349
4350 bool
4351 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4352 VEC (data_reference_p, heap) **datarefs)
4353 {
4354 unsigned i;
4355 VEC (data_ref_loc, heap) *references;
4356 data_ref_loc *ref;
4357 bool ret = true;
4358 data_reference_p dr;
4359
4360 if (get_references_in_stmt (stmt, &references))
4361 {
4362 VEC_free (data_ref_loc, heap, references);
4363 return false;
4364 }
4365
4366 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4367 {
4368 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4369 gcc_assert (dr != NULL);
4370 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4371 }
4372
4373 VEC_free (data_ref_loc, heap, references);
4374 return ret;
4375 }
4376
4377 /* Search the data references in LOOP, and record the information into
4378 DATAREFS. Returns chrec_dont_know when failing to analyze a
4379 difficult case, returns NULL_TREE otherwise. */
4380
4381 tree
4382 find_data_references_in_bb (struct loop *loop, basic_block bb,
4383 VEC (data_reference_p, heap) **datarefs)
4384 {
4385 gimple_stmt_iterator bsi;
4386
4387 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4388 {
4389 gimple stmt = gsi_stmt (bsi);
4390
4391 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4392 {
4393 struct data_reference *res;
4394 res = XCNEW (struct data_reference);
4395 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4396
4397 return chrec_dont_know;
4398 }
4399 }
4400
4401 return NULL_TREE;
4402 }
4403
4404 /* Search the data references in LOOP, and record the information into
4405 DATAREFS. Returns chrec_dont_know when failing to analyze a
4406 difficult case, returns NULL_TREE otherwise.
4407
4408 TODO: This function should be made smarter so that it can handle address
4409 arithmetic as if they were array accesses, etc. */
4410
4411 static tree
4412 find_data_references_in_loop (struct loop *loop,
4413 VEC (data_reference_p, heap) **datarefs)
4414 {
4415 basic_block bb, *bbs;
4416 unsigned int i;
4417
4418 bbs = get_loop_body_in_dom_order (loop);
4419
4420 for (i = 0; i < loop->num_nodes; i++)
4421 {
4422 bb = bbs[i];
4423
4424 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4425 {
4426 free (bbs);
4427 return chrec_dont_know;
4428 }
4429 }
4430 free (bbs);
4431
4432 return NULL_TREE;
4433 }
4434
4435 /* Recursive helper function. */
4436
4437 static bool
4438 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4439 {
4440 /* Inner loops of the nest should not contain siblings. Example:
4441 when there are two consecutive loops,
4442
4443 | loop_0
4444 | loop_1
4445 | A[{0, +, 1}_1]
4446 | endloop_1
4447 | loop_2
4448 | A[{0, +, 1}_2]
4449 | endloop_2
4450 | endloop_0
4451
4452 the dependence relation cannot be captured by the distance
4453 abstraction. */
4454 if (loop->next)
4455 return false;
4456
4457 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4458 if (loop->inner)
4459 return find_loop_nest_1 (loop->inner, loop_nest);
4460 return true;
4461 }
4462
4463 /* Return false when the LOOP is not well nested. Otherwise return
4464 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4465 contain the loops from the outermost to the innermost, as they will
4466 appear in the classic distance vector. */
4467
4468 bool
4469 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4470 {
4471 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4472 if (loop->inner)
4473 return find_loop_nest_1 (loop->inner, loop_nest);
4474 return true;
4475 }
4476
4477 /* Returns true when the data dependences have been computed, false otherwise.
4478 Given a loop nest LOOP, the following vectors are returned:
4479 DATAREFS is initialized to all the array elements contained in this loop,
4480 DEPENDENCE_RELATIONS contains the relations between the data references.
4481 Compute read-read and self relations if
4482 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4483
4484 bool
4485 compute_data_dependences_for_loop (struct loop *loop,
4486 bool compute_self_and_read_read_dependences,
4487 VEC (loop_p, heap) **loop_nest,
4488 VEC (data_reference_p, heap) **datarefs,
4489 VEC (ddr_p, heap) **dependence_relations)
4490 {
4491 bool res = true;
4492
4493 memset (&dependence_stats, 0, sizeof (dependence_stats));
4494
4495 /* If the loop nest is not well formed, or one of the data references
4496 is not computable, give up without spending time to compute other
4497 dependences. */
4498 if (!loop
4499 || !find_loop_nest (loop, loop_nest)
4500 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4501 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4502 compute_self_and_read_read_dependences))
4503 res = false;
4504
4505 if (dump_file && (dump_flags & TDF_STATS))
4506 {
4507 fprintf (dump_file, "Dependence tester statistics:\n");
4508
4509 fprintf (dump_file, "Number of dependence tests: %d\n",
4510 dependence_stats.num_dependence_tests);
4511 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4512 dependence_stats.num_dependence_dependent);
4513 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4514 dependence_stats.num_dependence_independent);
4515 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4516 dependence_stats.num_dependence_undetermined);
4517
4518 fprintf (dump_file, "Number of subscript tests: %d\n",
4519 dependence_stats.num_subscript_tests);
4520 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4521 dependence_stats.num_subscript_undetermined);
4522 fprintf (dump_file, "Number of same subscript function: %d\n",
4523 dependence_stats.num_same_subscript_function);
4524
4525 fprintf (dump_file, "Number of ziv tests: %d\n",
4526 dependence_stats.num_ziv);
4527 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4528 dependence_stats.num_ziv_dependent);
4529 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4530 dependence_stats.num_ziv_independent);
4531 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4532 dependence_stats.num_ziv_unimplemented);
4533
4534 fprintf (dump_file, "Number of siv tests: %d\n",
4535 dependence_stats.num_siv);
4536 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4537 dependence_stats.num_siv_dependent);
4538 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4539 dependence_stats.num_siv_independent);
4540 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4541 dependence_stats.num_siv_unimplemented);
4542
4543 fprintf (dump_file, "Number of miv tests: %d\n",
4544 dependence_stats.num_miv);
4545 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4546 dependence_stats.num_miv_dependent);
4547 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4548 dependence_stats.num_miv_independent);
4549 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4550 dependence_stats.num_miv_unimplemented);
4551 }
4552
4553 return res;
4554 }
4555
4556 /* Returns true when the data dependences for the basic block BB have been
4557 computed, false otherwise.
4558 DATAREFS is initialized to all the array elements contained in this basic
4559 block, DEPENDENCE_RELATIONS contains the relations between the data
4560 references. Compute read-read and self relations if
4561 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4562 bool
4563 compute_data_dependences_for_bb (basic_block bb,
4564 bool compute_self_and_read_read_dependences,
4565 VEC (data_reference_p, heap) **datarefs,
4566 VEC (ddr_p, heap) **dependence_relations)
4567 {
4568 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4569 return false;
4570
4571 return compute_all_dependences (*datarefs, dependence_relations, NULL,
4572 compute_self_and_read_read_dependences);
4573 }
4574
4575 /* Entry point (for testing only). Analyze all the data references
4576 and the dependence relations in LOOP.
4577
4578 The data references are computed first.
4579
4580 A relation on these nodes is represented by a complete graph. Some
4581 of the relations could be of no interest, thus the relations can be
4582 computed on demand.
4583
4584 In the following function we compute all the relations. This is
4585 just a first implementation that is here for:
4586 - for showing how to ask for the dependence relations,
4587 - for the debugging the whole dependence graph,
4588 - for the dejagnu testcases and maintenance.
4589
4590 It is possible to ask only for a part of the graph, avoiding to
4591 compute the whole dependence graph. The computed dependences are
4592 stored in a knowledge base (KB) such that later queries don't
4593 recompute the same information. The implementation of this KB is
4594 transparent to the optimizer, and thus the KB can be changed with a
4595 more efficient implementation, or the KB could be disabled. */
4596 static void
4597 analyze_all_data_dependences (struct loop *loop)
4598 {
4599 unsigned int i;
4600 int nb_data_refs = 10;
4601 VEC (data_reference_p, heap) *datarefs =
4602 VEC_alloc (data_reference_p, heap, nb_data_refs);
4603 VEC (ddr_p, heap) *dependence_relations =
4604 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4605 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4606
4607 /* Compute DDs on the whole function. */
4608 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4609 &dependence_relations);
4610
4611 if (dump_file)
4612 {
4613 dump_data_dependence_relations (dump_file, dependence_relations);
4614 fprintf (dump_file, "\n\n");
4615
4616 if (dump_flags & TDF_DETAILS)
4617 dump_dist_dir_vectors (dump_file, dependence_relations);
4618
4619 if (dump_flags & TDF_STATS)
4620 {
4621 unsigned nb_top_relations = 0;
4622 unsigned nb_bot_relations = 0;
4623 unsigned nb_chrec_relations = 0;
4624 struct data_dependence_relation *ddr;
4625
4626 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4627 {
4628 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4629 nb_top_relations++;
4630
4631 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4632 nb_bot_relations++;
4633
4634 else
4635 nb_chrec_relations++;
4636 }
4637
4638 gather_stats_on_scev_database ();
4639 }
4640 }
4641
4642 VEC_free (loop_p, heap, loop_nest);
4643 free_dependence_relations (dependence_relations);
4644 free_data_refs (datarefs);
4645 }
4646
4647 /* Computes all the data dependences and check that the results of
4648 several analyzers are the same. */
4649
4650 void
4651 tree_check_data_deps (void)
4652 {
4653 loop_iterator li;
4654 struct loop *loop_nest;
4655
4656 FOR_EACH_LOOP (li, loop_nest, 0)
4657 analyze_all_data_dependences (loop_nest);
4658 }
4659
4660 /* Free the memory used by a data dependence relation DDR. */
4661
4662 void
4663 free_dependence_relation (struct data_dependence_relation *ddr)
4664 {
4665 if (ddr == NULL)
4666 return;
4667
4668 if (DDR_SUBSCRIPTS (ddr))
4669 free_subscripts (DDR_SUBSCRIPTS (ddr));
4670 if (DDR_DIST_VECTS (ddr))
4671 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4672 if (DDR_DIR_VECTS (ddr))
4673 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4674
4675 free (ddr);
4676 }
4677
4678 /* Free the memory used by the data dependence relations from
4679 DEPENDENCE_RELATIONS. */
4680
4681 void
4682 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4683 {
4684 unsigned int i;
4685 struct data_dependence_relation *ddr;
4686
4687 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4688 if (ddr)
4689 free_dependence_relation (ddr);
4690
4691 VEC_free (ddr_p, heap, dependence_relations);
4692 }
4693
4694 /* Free the memory used by the data references from DATAREFS. */
4695
4696 void
4697 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4698 {
4699 unsigned int i;
4700 struct data_reference *dr;
4701
4702 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4703 free_data_ref (dr);
4704 VEC_free (data_reference_p, heap, datarefs);
4705 }
4706
4707 \f
4708
4709 /* Dump vertex I in RDG to FILE. */
4710
4711 void
4712 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4713 {
4714 struct vertex *v = &(rdg->vertices[i]);
4715 struct graph_edge *e;
4716
4717 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4718 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4719 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4720
4721 if (v->pred)
4722 for (e = v->pred; e; e = e->pred_next)
4723 fprintf (file, " %d", e->src);
4724
4725 fprintf (file, ") (out:");
4726
4727 if (v->succ)
4728 for (e = v->succ; e; e = e->succ_next)
4729 fprintf (file, " %d", e->dest);
4730
4731 fprintf (file, ")\n");
4732 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4733 fprintf (file, ")\n");
4734 }
4735
4736 /* Call dump_rdg_vertex on stderr. */
4737
4738 DEBUG_FUNCTION void
4739 debug_rdg_vertex (struct graph *rdg, int i)
4740 {
4741 dump_rdg_vertex (stderr, rdg, i);
4742 }
4743
4744 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4745 dumped vertices to that bitmap. */
4746
4747 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4748 {
4749 int i;
4750
4751 fprintf (file, "(%d\n", c);
4752
4753 for (i = 0; i < rdg->n_vertices; i++)
4754 if (rdg->vertices[i].component == c)
4755 {
4756 if (dumped)
4757 bitmap_set_bit (dumped, i);
4758
4759 dump_rdg_vertex (file, rdg, i);
4760 }
4761
4762 fprintf (file, ")\n");
4763 }
4764
4765 /* Call dump_rdg_vertex on stderr. */
4766
4767 DEBUG_FUNCTION void
4768 debug_rdg_component (struct graph *rdg, int c)
4769 {
4770 dump_rdg_component (stderr, rdg, c, NULL);
4771 }
4772
4773 /* Dump the reduced dependence graph RDG to FILE. */
4774
4775 void
4776 dump_rdg (FILE *file, struct graph *rdg)
4777 {
4778 int i;
4779 bitmap dumped = BITMAP_ALLOC (NULL);
4780
4781 fprintf (file, "(rdg\n");
4782
4783 for (i = 0; i < rdg->n_vertices; i++)
4784 if (!bitmap_bit_p (dumped, i))
4785 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4786
4787 fprintf (file, ")\n");
4788 BITMAP_FREE (dumped);
4789 }
4790
4791 /* Call dump_rdg on stderr. */
4792
4793 DEBUG_FUNCTION void
4794 debug_rdg (struct graph *rdg)
4795 {
4796 dump_rdg (stderr, rdg);
4797 }
4798
4799 static void
4800 dot_rdg_1 (FILE *file, struct graph *rdg)
4801 {
4802 int i;
4803
4804 fprintf (file, "digraph RDG {\n");
4805
4806 for (i = 0; i < rdg->n_vertices; i++)
4807 {
4808 struct vertex *v = &(rdg->vertices[i]);
4809 struct graph_edge *e;
4810
4811 /* Highlight reads from memory. */
4812 if (RDG_MEM_READS_STMT (rdg, i))
4813 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4814
4815 /* Highlight stores to memory. */
4816 if (RDG_MEM_WRITE_STMT (rdg, i))
4817 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4818
4819 if (v->succ)
4820 for (e = v->succ; e; e = e->succ_next)
4821 switch (RDGE_TYPE (e))
4822 {
4823 case input_dd:
4824 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4825 break;
4826
4827 case output_dd:
4828 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4829 break;
4830
4831 case flow_dd:
4832 /* These are the most common dependences: don't print these. */
4833 fprintf (file, "%d -> %d \n", i, e->dest);
4834 break;
4835
4836 case anti_dd:
4837 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4838 break;
4839
4840 default:
4841 gcc_unreachable ();
4842 }
4843 }
4844
4845 fprintf (file, "}\n\n");
4846 }
4847
4848 /* Display the Reduced Dependence Graph using dotty. */
4849 extern void dot_rdg (struct graph *);
4850
4851 DEBUG_FUNCTION void
4852 dot_rdg (struct graph *rdg)
4853 {
4854 /* When debugging, enable the following code. This cannot be used
4855 in production compilers because it calls "system". */
4856 #if 0
4857 FILE *file = fopen ("/tmp/rdg.dot", "w");
4858 gcc_assert (file != NULL);
4859
4860 dot_rdg_1 (file, rdg);
4861 fclose (file);
4862
4863 system ("dotty /tmp/rdg.dot &");
4864 #else
4865 dot_rdg_1 (stderr, rdg);
4866 #endif
4867 }
4868
4869 /* This structure is used for recording the mapping statement index in
4870 the RDG. */
4871
4872 struct GTY(()) rdg_vertex_info
4873 {
4874 gimple stmt;
4875 int index;
4876 };
4877
4878 /* Returns the index of STMT in RDG. */
4879
4880 int
4881 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4882 {
4883 struct rdg_vertex_info rvi, *slot;
4884
4885 rvi.stmt = stmt;
4886 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4887
4888 if (!slot)
4889 return -1;
4890
4891 return slot->index;
4892 }
4893
4894 /* Creates an edge in RDG for each distance vector from DDR. The
4895 order that we keep track of in the RDG is the order in which
4896 statements have to be executed. */
4897
4898 static void
4899 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4900 {
4901 struct graph_edge *e;
4902 int va, vb;
4903 data_reference_p dra = DDR_A (ddr);
4904 data_reference_p drb = DDR_B (ddr);
4905 unsigned level = ddr_dependence_level (ddr);
4906
4907 /* For non scalar dependences, when the dependence is REVERSED,
4908 statement B has to be executed before statement A. */
4909 if (level > 0
4910 && !DDR_REVERSED_P (ddr))
4911 {
4912 data_reference_p tmp = dra;
4913 dra = drb;
4914 drb = tmp;
4915 }
4916
4917 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4918 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4919
4920 if (va < 0 || vb < 0)
4921 return;
4922
4923 e = add_edge (rdg, va, vb);
4924 e->data = XNEW (struct rdg_edge);
4925
4926 RDGE_LEVEL (e) = level;
4927 RDGE_RELATION (e) = ddr;
4928
4929 /* Determines the type of the data dependence. */
4930 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4931 RDGE_TYPE (e) = input_dd;
4932 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4933 RDGE_TYPE (e) = output_dd;
4934 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4935 RDGE_TYPE (e) = flow_dd;
4936 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4937 RDGE_TYPE (e) = anti_dd;
4938 }
4939
4940 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4941 the index of DEF in RDG. */
4942
4943 static void
4944 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4945 {
4946 use_operand_p imm_use_p;
4947 imm_use_iterator iterator;
4948
4949 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4950 {
4951 struct graph_edge *e;
4952 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4953
4954 if (use < 0)
4955 continue;
4956
4957 e = add_edge (rdg, idef, use);
4958 e->data = XNEW (struct rdg_edge);
4959 RDGE_TYPE (e) = flow_dd;
4960 RDGE_RELATION (e) = NULL;
4961 }
4962 }
4963
4964 /* Creates the edges of the reduced dependence graph RDG. */
4965
4966 static void
4967 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4968 {
4969 int i;
4970 struct data_dependence_relation *ddr;
4971 def_operand_p def_p;
4972 ssa_op_iter iter;
4973
4974 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4975 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4976 create_rdg_edge_for_ddr (rdg, ddr);
4977
4978 for (i = 0; i < rdg->n_vertices; i++)
4979 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4980 iter, SSA_OP_DEF)
4981 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4982 }
4983
4984 /* Build the vertices of the reduced dependence graph RDG. */
4985
4986 void
4987 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4988 {
4989 int i, j;
4990 gimple stmt;
4991
4992 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4993 {
4994 VEC (data_ref_loc, heap) *references;
4995 data_ref_loc *ref;
4996 struct vertex *v = &(rdg->vertices[i]);
4997 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4998 struct rdg_vertex_info **slot;
4999
5000 rvi->stmt = stmt;
5001 rvi->index = i;
5002 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
5003
5004 if (!*slot)
5005 *slot = rvi;
5006 else
5007 free (rvi);
5008
5009 v->data = XNEW (struct rdg_vertex);
5010 RDG_STMT (rdg, i) = stmt;
5011
5012 RDG_MEM_WRITE_STMT (rdg, i) = false;
5013 RDG_MEM_READS_STMT (rdg, i) = false;
5014 if (gimple_code (stmt) == GIMPLE_PHI)
5015 continue;
5016
5017 get_references_in_stmt (stmt, &references);
5018 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
5019 if (!ref->is_read)
5020 RDG_MEM_WRITE_STMT (rdg, i) = true;
5021 else
5022 RDG_MEM_READS_STMT (rdg, i) = true;
5023
5024 VEC_free (data_ref_loc, heap, references);
5025 }
5026 }
5027
5028 /* Initialize STMTS with all the statements of LOOP. When
5029 INCLUDE_PHIS is true, include also the PHI nodes. The order in
5030 which we discover statements is important as
5031 generate_loops_for_partition is using the same traversal for
5032 identifying statements. */
5033
5034 static void
5035 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5036 {
5037 unsigned int i;
5038 basic_block *bbs = get_loop_body_in_dom_order (loop);
5039
5040 for (i = 0; i < loop->num_nodes; i++)
5041 {
5042 basic_block bb = bbs[i];
5043 gimple_stmt_iterator bsi;
5044 gimple stmt;
5045
5046 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5047 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5048
5049 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5050 {
5051 stmt = gsi_stmt (bsi);
5052 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5053 VEC_safe_push (gimple, heap, *stmts, stmt);
5054 }
5055 }
5056
5057 free (bbs);
5058 }
5059
5060 /* Returns true when all the dependences are computable. */
5061
5062 static bool
5063 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5064 {
5065 ddr_p ddr;
5066 unsigned int i;
5067
5068 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5069 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5070 return false;
5071
5072 return true;
5073 }
5074
5075 /* Computes a hash function for element ELT. */
5076
5077 static hashval_t
5078 hash_stmt_vertex_info (const void *elt)
5079 {
5080 const struct rdg_vertex_info *const rvi =
5081 (const struct rdg_vertex_info *) elt;
5082 gimple stmt = rvi->stmt;
5083
5084 return htab_hash_pointer (stmt);
5085 }
5086
5087 /* Compares database elements E1 and E2. */
5088
5089 static int
5090 eq_stmt_vertex_info (const void *e1, const void *e2)
5091 {
5092 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5093 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5094
5095 return elt1->stmt == elt2->stmt;
5096 }
5097
5098 /* Free the element E. */
5099
5100 static void
5101 hash_stmt_vertex_del (void *e)
5102 {
5103 free (e);
5104 }
5105
5106 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5107 statement of the loop nest, and one edge per data dependence or
5108 scalar dependence. */
5109
5110 struct graph *
5111 build_empty_rdg (int n_stmts)
5112 {
5113 int nb_data_refs = 10;
5114 struct graph *rdg = new_graph (n_stmts);
5115
5116 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5117 eq_stmt_vertex_info, hash_stmt_vertex_del);
5118 return rdg;
5119 }
5120
5121 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5122 statement of the loop nest, and one edge per data dependence or
5123 scalar dependence. */
5124
5125 struct graph *
5126 build_rdg (struct loop *loop,
5127 VEC (loop_p, heap) **loop_nest,
5128 VEC (ddr_p, heap) **dependence_relations,
5129 VEC (data_reference_p, heap) **datarefs)
5130 {
5131 struct graph *rdg = NULL;
5132 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5133
5134 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5135 dependence_relations);
5136
5137 if (known_dependences_p (*dependence_relations))
5138 {
5139 stmts_from_loop (loop, &stmts);
5140 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5141 create_rdg_vertices (rdg, stmts);
5142 create_rdg_edges (rdg, *dependence_relations);
5143 }
5144
5145 VEC_free (gimple, heap, stmts);
5146 return rdg;
5147 }
5148
5149 /* Free the reduced dependence graph RDG. */
5150
5151 void
5152 free_rdg (struct graph *rdg)
5153 {
5154 int i;
5155
5156 for (i = 0; i < rdg->n_vertices; i++)
5157 {
5158 struct vertex *v = &(rdg->vertices[i]);
5159 struct graph_edge *e;
5160
5161 for (e = v->succ; e; e = e->succ_next)
5162 free (e->data);
5163
5164 free (v->data);
5165 }
5166
5167 htab_delete (rdg->indices);
5168 free_graph (rdg);
5169 }
5170
5171 /* Initialize STMTS with all the statements of LOOP that contain a
5172 store to memory. */
5173
5174 void
5175 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5176 {
5177 unsigned int i;
5178 basic_block *bbs = get_loop_body_in_dom_order (loop);
5179
5180 for (i = 0; i < loop->num_nodes; i++)
5181 {
5182 basic_block bb = bbs[i];
5183 gimple_stmt_iterator bsi;
5184
5185 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5186 if (gimple_vdef (gsi_stmt (bsi)))
5187 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5188 }
5189
5190 free (bbs);
5191 }
5192
5193 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5194 that contains a data reference on its LHS with a stride of the same
5195 size as its unit type. */
5196
5197 bool
5198 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5199 {
5200 tree op0, op1;
5201 bool res;
5202 struct data_reference *dr;
5203
5204 if (!stmt
5205 || !gimple_vdef (stmt)
5206 || !is_gimple_assign (stmt)
5207 || !gimple_assign_single_p (stmt)
5208 || !(op1 = gimple_assign_rhs1 (stmt))
5209 || !(integer_zerop (op1) || real_zerop (op1)))
5210 return false;
5211
5212 dr = XCNEW (struct data_reference);
5213 op0 = gimple_assign_lhs (stmt);
5214
5215 DR_STMT (dr) = stmt;
5216 DR_REF (dr) = op0;
5217
5218 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5219 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5220
5221 free_data_ref (dr);
5222 return res;
5223 }
5224
5225 /* Initialize STMTS with all the statements of LOOP that contain a
5226 store to memory of the form "A[i] = 0". */
5227
5228 void
5229 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5230 {
5231 unsigned int i;
5232 basic_block bb;
5233 gimple_stmt_iterator si;
5234 gimple stmt;
5235 basic_block *bbs = get_loop_body_in_dom_order (loop);
5236
5237 for (i = 0; i < loop->num_nodes; i++)
5238 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5239 if ((stmt = gsi_stmt (si))
5240 && stmt_with_adjacent_zero_store_dr_p (stmt))
5241 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5242
5243 free (bbs);
5244 }
5245
5246 /* For a data reference REF, return the declaration of its base
5247 address or NULL_TREE if the base is not determined. */
5248
5249 static inline tree
5250 ref_base_address (gimple stmt, data_ref_loc *ref)
5251 {
5252 tree base = NULL_TREE;
5253 tree base_address;
5254 struct data_reference *dr = XCNEW (struct data_reference);
5255
5256 DR_STMT (dr) = stmt;
5257 DR_REF (dr) = *ref->pos;
5258 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5259 base_address = DR_BASE_ADDRESS (dr);
5260
5261 if (!base_address)
5262 goto end;
5263
5264 switch (TREE_CODE (base_address))
5265 {
5266 case ADDR_EXPR:
5267 base = TREE_OPERAND (base_address, 0);
5268 break;
5269
5270 default:
5271 base = base_address;
5272 break;
5273 }
5274
5275 end:
5276 free_data_ref (dr);
5277 return base;
5278 }
5279
5280 /* Determines whether the statement from vertex V of the RDG has a
5281 definition used outside the loop that contains this statement. */
5282
5283 bool
5284 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5285 {
5286 gimple stmt = RDG_STMT (rdg, v);
5287 struct loop *loop = loop_containing_stmt (stmt);
5288 use_operand_p imm_use_p;
5289 imm_use_iterator iterator;
5290 ssa_op_iter it;
5291 def_operand_p def_p;
5292
5293 if (!loop)
5294 return true;
5295
5296 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5297 {
5298 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5299 {
5300 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5301 return true;
5302 }
5303 }
5304
5305 return false;
5306 }
5307
5308 /* Determines whether statements S1 and S2 access to similar memory
5309 locations. Two memory accesses are considered similar when they
5310 have the same base address declaration, i.e. when their
5311 ref_base_address is the same. */
5312
5313 bool
5314 have_similar_memory_accesses (gimple s1, gimple s2)
5315 {
5316 bool res = false;
5317 unsigned i, j;
5318 VEC (data_ref_loc, heap) *refs1, *refs2;
5319 data_ref_loc *ref1, *ref2;
5320
5321 get_references_in_stmt (s1, &refs1);
5322 get_references_in_stmt (s2, &refs2);
5323
5324 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5325 {
5326 tree base1 = ref_base_address (s1, ref1);
5327
5328 if (base1)
5329 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5330 if (base1 == ref_base_address (s2, ref2))
5331 {
5332 res = true;
5333 goto end;
5334 }
5335 }
5336
5337 end:
5338 VEC_free (data_ref_loc, heap, refs1);
5339 VEC_free (data_ref_loc, heap, refs2);
5340 return res;
5341 }
5342
5343 /* Helper function for the hashtab. */
5344
5345 static int
5346 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5347 {
5348 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5349 CONST_CAST_GIMPLE ((const_gimple) s2));
5350 }
5351
5352 /* Helper function for the hashtab. */
5353
5354 static hashval_t
5355 ref_base_address_1 (const void *s)
5356 {
5357 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5358 unsigned i;
5359 VEC (data_ref_loc, heap) *refs;
5360 data_ref_loc *ref;
5361 hashval_t res = 0;
5362
5363 get_references_in_stmt (stmt, &refs);
5364
5365 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5366 if (!ref->is_read)
5367 {
5368 res = htab_hash_pointer (ref_base_address (stmt, ref));
5369 break;
5370 }
5371
5372 VEC_free (data_ref_loc, heap, refs);
5373 return res;
5374 }
5375
5376 /* Try to remove duplicated write data references from STMTS. */
5377
5378 void
5379 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5380 {
5381 unsigned i;
5382 gimple stmt;
5383 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5384 have_similar_memory_accesses_1, NULL);
5385
5386 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5387 {
5388 void **slot;
5389
5390 slot = htab_find_slot (seen, stmt, INSERT);
5391
5392 if (*slot)
5393 VEC_ordered_remove (gimple, *stmts, i);
5394 else
5395 {
5396 *slot = (void *) stmt;
5397 i++;
5398 }
5399 }
5400
5401 htab_delete (seen);
5402 }
5403
5404 /* Returns the index of PARAMETER in the parameters vector of the
5405 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5406
5407 int
5408 access_matrix_get_index_for_parameter (tree parameter,
5409 struct access_matrix *access_matrix)
5410 {
5411 int i;
5412 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5413 tree lambda_parameter;
5414
5415 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5416 if (lambda_parameter == parameter)
5417 return i + AM_NB_INDUCTION_VARS (access_matrix);
5418
5419 return -1;
5420 }