re PR middle-end/45098 (Missed induction variable optimization)
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87
88 static struct datadep_stats
89 {
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
94
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
98
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
103
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
108
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
114
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
120
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
123 {
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
127 }
128
129 /* Returns true iff A divides B. */
130
131 static inline bool
132 int_divides_p (int a, int b)
133 {
134 return ((b % a) == 0);
135 }
136
137 \f
138
139 /* Dump into FILE all the data references from DATAREFS. */
140
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
143 {
144 unsigned int i;
145 struct data_reference *dr;
146
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
149 }
150
151 /* Dump into STDERR all the data references from DATAREFS. */
152
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
155 {
156 dump_data_references (stderr, datarefs);
157 }
158
159 /* Dump to STDERR all the dependence relations from DDRS. */
160
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
163 {
164 dump_data_dependence_relations (stderr, ddrs);
165 }
166
167 /* Dump into FILE all the dependence relations from DDRS. */
168
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
172 {
173 unsigned int i;
174 struct data_dependence_relation *ddr;
175
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
178 }
179
180 /* Print to STDERR the data_reference DR. */
181
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
184 {
185 dump_data_reference (stderr, dr);
186 }
187
188 /* Dump function for a DATA_REFERENCE structure. */
189
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
193 {
194 unsigned int i;
195
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
204
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
206 {
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
209 }
210 fprintf (outf, "#)\n");
211 }
212
213 /* Dumps the affine function described by FN to the file OUTF. */
214
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
217 {
218 unsigned i;
219 tree coef;
220
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
223 {
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
227 }
228 }
229
230 /* Dumps the conflict function CF to the file OUTF. */
231
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
234 {
235 unsigned i;
236
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
242 {
243 for (i = 0; i < cf->n; i++)
244 {
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
248 }
249 }
250 }
251
252 /* Dump function for a SUBSCRIPT structure. */
253
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
256 {
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
258
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
263 {
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
267 }
268
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
273 {
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
277 }
278
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
283 }
284
285 /* Print the classic direction vector DIRV to OUTF. */
286
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
291 {
292 int eq;
293
294 for (eq = 0; eq < length; eq++)
295 {
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
298
299 switch (dir)
300 {
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
325 }
326 }
327 fprintf (outf, "\n");
328 }
329
330 /* Print a vector of direction vectors. */
331
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
335 {
336 unsigned j;
337 lambda_vector v;
338
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
341 }
342
343 /* Print out a vector VEC of length N to OUTFILE. */
344
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
347 {
348 int i;
349
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
353 }
354
355 /* Print a vector of distance vectors. */
356
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
360 {
361 unsigned j;
362 lambda_vector v;
363
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
366 }
367
368 /* Debug version. */
369
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
372 {
373 dump_data_dependence_relation (stderr, ddr);
374 }
375
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
377
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
381 {
382 struct data_reference *dra, *drb;
383
384 fprintf (outf, "(Data Dep: \n");
385
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
387 {
388 if (ddr)
389 {
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
400 }
401 fprintf (outf, " (don't know)\n)\n");
402 return;
403 }
404
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
409
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
412
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
414 {
415 unsigned int i;
416 struct loop *loopi;
417
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
419 {
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
425 }
426
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
432
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
434 {
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
438 }
439
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
441 {
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
445 }
446 }
447
448 fprintf (outf, ")\n");
449 }
450
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
452
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
456 {
457 switch (dir)
458 {
459 case dir_positive:
460 fprintf (file, "+");
461 break;
462
463 case dir_negative:
464 fprintf (file, "-");
465 break;
466
467 case dir_equal:
468 fprintf (file, "=");
469 break;
470
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
474
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
478
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
482
483 case dir_star:
484 fprintf (file, "*");
485 break;
486
487 default:
488 break;
489 }
490 }
491
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
496
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
499 {
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
503
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
506 {
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
508 {
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
512 }
513
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
515 {
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
519 }
520 }
521
522 fprintf (file, "\n\n");
523 }
524
525 /* Dumps the data dependence relations DDRS in FILE. */
526
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
529 {
530 unsigned int i;
531 struct data_dependence_relation *ddr;
532
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
535
536 fprintf (file, "\n\n");
537 }
538
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
544
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
548 {
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
552
553 *var = NULL_TREE;
554 *off = NULL_TREE;
555
556 switch (code)
557 {
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
562
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
573
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
577
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
582
583 case ADDR_EXPR:
584 {
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
589
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
593
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
596
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
601
602 if (poffset)
603 {
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
612 }
613
614 var0 = fold_convert (type, base);
615
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
622 {
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
631 return false;
632
633 *var = var0;
634 *off = off0;
635 return true;
636 }
637
638 case SSA_NAME:
639 {
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
642
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
645
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
649
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
651 }
652 CASE_CONVERT:
653 {
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 {
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
667 }
668 return false;
669 }
670
671 default:
672 return false;
673 }
674 }
675
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
678
679 void
680 split_constant_offset (tree exp, tree *var, tree *off)
681 {
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
684
685 *var = exp;
686 *off = ssize_int (0);
687 STRIP_NOPS (exp);
688
689 if (tree_is_chrec (exp))
690 return;
691
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 {
697 *var = fold_convert (type, e);
698 *off = o;
699 }
700 }
701
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
704
705 static tree
706 canonicalize_base_object_address (tree addr)
707 {
708 tree orig = addr;
709
710 STRIP_NOPS (addr);
711
712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
716
717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
719
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
721 }
722
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
726
727 bool
728 dr_analyze_innermost (struct data_reference *dr)
729 {
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
734 tree base, poffset;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
740
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
743
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
747
748 if (pbitpos % BITS_PER_UNIT != 0)
749 {
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
752 return false;
753 }
754
755 if (TREE_CODE (base) == MEM_REF)
756 {
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 {
759 if (!poffset)
760 {
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
763 }
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 }
767 base = TREE_OPERAND (base, 0);
768 }
769 else
770 base = build_fold_addr_expr (base);
771 if (in_loop)
772 {
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
775 {
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
779 }
780 }
781 else
782 {
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
786 }
787
788 if (!poffset)
789 {
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
792 }
793 else
794 {
795 if (!in_loop)
796 {
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
799 }
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
802 {
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
807 }
808 }
809
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
815
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
819
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
825
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
830
831 return true;
832 }
833
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
836
837 static void
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 {
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
844
845 if (nest)
846 before_loop = block_before_loop (nest);
847
848 while (handled_component_p (aref))
849 {
850 if (TREE_CODE (aref) == ARRAY_REF)
851 {
852 op = TREE_OPERAND (aref, 1);
853 if (nest)
854 {
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
858 }
859
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
861 }
862
863 aref = TREE_OPERAND (aref, 0);
864 }
865
866 if (nest
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
869 {
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
880
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
883 }
884
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
900
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
903 }
904
905 /* Extracts the alias analysis information from the memory reference DR. */
906
907 static void
908 dr_analyze_alias (struct data_reference *dr)
909 {
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
912
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
915 {
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
919 }
920 }
921
922 /* Returns true if the address of DR is invariant. */
923
924 static bool
925 dr_address_invariant_p (struct data_reference *dr)
926 {
927 unsigned i;
928 tree idx;
929
930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
931 if (tree_contains_chrecs (idx, NULL))
932 return false;
933
934 return true;
935 }
936
937 /* Frees data reference DR. */
938
939 void
940 free_data_ref (data_reference_p dr)
941 {
942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
943 free (dr);
944 }
945
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
951
952 struct data_reference *
953 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
954 bool is_read)
955 {
956 struct data_reference *dr;
957
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 {
960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
963 }
964
965 dr = XCNEW (struct data_reference);
966 DR_STMT (dr) = stmt;
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
969
970 dr_analyze_innermost (dr);
971 dr_analyze_indices (dr, nest, loop);
972 dr_analyze_alias (dr);
973
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 {
976 fprintf (dump_file, "\tbase_address: ");
977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
988 fprintf (dump_file, "\n");
989 }
990
991 return dr;
992 }
993
994 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
995 expressions. */
996 static bool
997 dr_equal_offsets_p1 (tree offset1, tree offset2)
998 {
999 bool res;
1000
1001 STRIP_NOPS (offset1);
1002 STRIP_NOPS (offset2);
1003
1004 if (offset1 == offset2)
1005 return true;
1006
1007 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1008 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1009 return false;
1010
1011 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1012 TREE_OPERAND (offset2, 0));
1013
1014 if (!res || !BINARY_CLASS_P (offset1))
1015 return res;
1016
1017 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1018 TREE_OPERAND (offset2, 1));
1019
1020 return res;
1021 }
1022
1023 /* Check if DRA and DRB have equal offsets. */
1024 bool
1025 dr_equal_offsets_p (struct data_reference *dra,
1026 struct data_reference *drb)
1027 {
1028 tree offset1, offset2;
1029
1030 offset1 = DR_OFFSET (dra);
1031 offset2 = DR_OFFSET (drb);
1032
1033 return dr_equal_offsets_p1 (offset1, offset2);
1034 }
1035
1036 /* Returns true if FNA == FNB. */
1037
1038 static bool
1039 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1040 {
1041 unsigned i, n = VEC_length (tree, fna);
1042
1043 if (n != VEC_length (tree, fnb))
1044 return false;
1045
1046 for (i = 0; i < n; i++)
1047 if (!operand_equal_p (VEC_index (tree, fna, i),
1048 VEC_index (tree, fnb, i), 0))
1049 return false;
1050
1051 return true;
1052 }
1053
1054 /* If all the functions in CF are the same, returns one of them,
1055 otherwise returns NULL. */
1056
1057 static affine_fn
1058 common_affine_function (conflict_function *cf)
1059 {
1060 unsigned i;
1061 affine_fn comm;
1062
1063 if (!CF_NONTRIVIAL_P (cf))
1064 return NULL;
1065
1066 comm = cf->fns[0];
1067
1068 for (i = 1; i < cf->n; i++)
1069 if (!affine_function_equal_p (comm, cf->fns[i]))
1070 return NULL;
1071
1072 return comm;
1073 }
1074
1075 /* Returns the base of the affine function FN. */
1076
1077 static tree
1078 affine_function_base (affine_fn fn)
1079 {
1080 return VEC_index (tree, fn, 0);
1081 }
1082
1083 /* Returns true if FN is a constant. */
1084
1085 static bool
1086 affine_function_constant_p (affine_fn fn)
1087 {
1088 unsigned i;
1089 tree coef;
1090
1091 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1092 if (!integer_zerop (coef))
1093 return false;
1094
1095 return true;
1096 }
1097
1098 /* Returns true if FN is the zero constant function. */
1099
1100 static bool
1101 affine_function_zero_p (affine_fn fn)
1102 {
1103 return (integer_zerop (affine_function_base (fn))
1104 && affine_function_constant_p (fn));
1105 }
1106
1107 /* Returns a signed integer type with the largest precision from TA
1108 and TB. */
1109
1110 static tree
1111 signed_type_for_types (tree ta, tree tb)
1112 {
1113 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1114 return signed_type_for (ta);
1115 else
1116 return signed_type_for (tb);
1117 }
1118
1119 /* Applies operation OP on affine functions FNA and FNB, and returns the
1120 result. */
1121
1122 static affine_fn
1123 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1124 {
1125 unsigned i, n, m;
1126 affine_fn ret;
1127 tree coef;
1128
1129 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1130 {
1131 n = VEC_length (tree, fna);
1132 m = VEC_length (tree, fnb);
1133 }
1134 else
1135 {
1136 n = VEC_length (tree, fnb);
1137 m = VEC_length (tree, fna);
1138 }
1139
1140 ret = VEC_alloc (tree, heap, m);
1141 for (i = 0; i < n; i++)
1142 {
1143 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1144 TREE_TYPE (VEC_index (tree, fnb, i)));
1145
1146 VEC_quick_push (tree, ret,
1147 fold_build2 (op, type,
1148 VEC_index (tree, fna, i),
1149 VEC_index (tree, fnb, i)));
1150 }
1151
1152 for (; VEC_iterate (tree, fna, i, coef); i++)
1153 VEC_quick_push (tree, ret,
1154 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1155 coef, integer_zero_node));
1156 for (; VEC_iterate (tree, fnb, i, coef); i++)
1157 VEC_quick_push (tree, ret,
1158 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1159 integer_zero_node, coef));
1160
1161 return ret;
1162 }
1163
1164 /* Returns the sum of affine functions FNA and FNB. */
1165
1166 static affine_fn
1167 affine_fn_plus (affine_fn fna, affine_fn fnb)
1168 {
1169 return affine_fn_op (PLUS_EXPR, fna, fnb);
1170 }
1171
1172 /* Returns the difference of affine functions FNA and FNB. */
1173
1174 static affine_fn
1175 affine_fn_minus (affine_fn fna, affine_fn fnb)
1176 {
1177 return affine_fn_op (MINUS_EXPR, fna, fnb);
1178 }
1179
1180 /* Frees affine function FN. */
1181
1182 static void
1183 affine_fn_free (affine_fn fn)
1184 {
1185 VEC_free (tree, heap, fn);
1186 }
1187
1188 /* Determine for each subscript in the data dependence relation DDR
1189 the distance. */
1190
1191 static void
1192 compute_subscript_distance (struct data_dependence_relation *ddr)
1193 {
1194 conflict_function *cf_a, *cf_b;
1195 affine_fn fn_a, fn_b, diff;
1196
1197 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1198 {
1199 unsigned int i;
1200
1201 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1202 {
1203 struct subscript *subscript;
1204
1205 subscript = DDR_SUBSCRIPT (ddr, i);
1206 cf_a = SUB_CONFLICTS_IN_A (subscript);
1207 cf_b = SUB_CONFLICTS_IN_B (subscript);
1208
1209 fn_a = common_affine_function (cf_a);
1210 fn_b = common_affine_function (cf_b);
1211 if (!fn_a || !fn_b)
1212 {
1213 SUB_DISTANCE (subscript) = chrec_dont_know;
1214 return;
1215 }
1216 diff = affine_fn_minus (fn_a, fn_b);
1217
1218 if (affine_function_constant_p (diff))
1219 SUB_DISTANCE (subscript) = affine_function_base (diff);
1220 else
1221 SUB_DISTANCE (subscript) = chrec_dont_know;
1222
1223 affine_fn_free (diff);
1224 }
1225 }
1226 }
1227
1228 /* Returns the conflict function for "unknown". */
1229
1230 static conflict_function *
1231 conflict_fn_not_known (void)
1232 {
1233 conflict_function *fn = XCNEW (conflict_function);
1234 fn->n = NOT_KNOWN;
1235
1236 return fn;
1237 }
1238
1239 /* Returns the conflict function for "independent". */
1240
1241 static conflict_function *
1242 conflict_fn_no_dependence (void)
1243 {
1244 conflict_function *fn = XCNEW (conflict_function);
1245 fn->n = NO_DEPENDENCE;
1246
1247 return fn;
1248 }
1249
1250 /* Returns true if the address of OBJ is invariant in LOOP. */
1251
1252 static bool
1253 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1254 {
1255 while (handled_component_p (obj))
1256 {
1257 if (TREE_CODE (obj) == ARRAY_REF)
1258 {
1259 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1260 need to check the stride and the lower bound of the reference. */
1261 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1262 loop->num)
1263 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1264 loop->num))
1265 return false;
1266 }
1267 else if (TREE_CODE (obj) == COMPONENT_REF)
1268 {
1269 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1270 loop->num))
1271 return false;
1272 }
1273 obj = TREE_OPERAND (obj, 0);
1274 }
1275
1276 if (!INDIRECT_REF_P (obj)
1277 && TREE_CODE (obj) != MEM_REF)
1278 return true;
1279
1280 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1281 loop->num);
1282 }
1283
1284 /* Returns false if we can prove that data references A and B do not alias,
1285 true otherwise. */
1286
1287 bool
1288 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1289 {
1290 tree addr_a = DR_BASE_OBJECT (a);
1291 tree addr_b = DR_BASE_OBJECT (b);
1292
1293 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1294 return refs_output_dependent_p (addr_a, addr_b);
1295 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1296 return refs_anti_dependent_p (addr_a, addr_b);
1297 return refs_may_alias_p (addr_a, addr_b);
1298 }
1299
1300 static void compute_self_dependence (struct data_dependence_relation *);
1301
1302 /* Initialize a data dependence relation between data accesses A and
1303 B. NB_LOOPS is the number of loops surrounding the references: the
1304 size of the classic distance/direction vectors. */
1305
1306 static struct data_dependence_relation *
1307 initialize_data_dependence_relation (struct data_reference *a,
1308 struct data_reference *b,
1309 VEC (loop_p, heap) *loop_nest)
1310 {
1311 struct data_dependence_relation *res;
1312 unsigned int i;
1313
1314 res = XNEW (struct data_dependence_relation);
1315 DDR_A (res) = a;
1316 DDR_B (res) = b;
1317 DDR_LOOP_NEST (res) = NULL;
1318 DDR_REVERSED_P (res) = false;
1319 DDR_SUBSCRIPTS (res) = NULL;
1320 DDR_DIR_VECTS (res) = NULL;
1321 DDR_DIST_VECTS (res) = NULL;
1322
1323 if (a == NULL || b == NULL)
1324 {
1325 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1326 return res;
1327 }
1328
1329 /* If the data references do not alias, then they are independent. */
1330 if (!dr_may_alias_p (a, b))
1331 {
1332 DDR_ARE_DEPENDENT (res) = chrec_known;
1333 return res;
1334 }
1335
1336 /* When the references are exactly the same, don't spend time doing
1337 the data dependence tests, just initialize the ddr and return. */
1338 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1339 {
1340 DDR_AFFINE_P (res) = true;
1341 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1342 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1343 DDR_LOOP_NEST (res) = loop_nest;
1344 DDR_INNER_LOOP (res) = 0;
1345 DDR_SELF_REFERENCE (res) = true;
1346 compute_self_dependence (res);
1347 return res;
1348 }
1349
1350 /* If the references do not access the same object, we do not know
1351 whether they alias or not. */
1352 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1353 {
1354 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1355 return res;
1356 }
1357
1358 /* If the base of the object is not invariant in the loop nest, we cannot
1359 analyze it. TODO -- in fact, it would suffice to record that there may
1360 be arbitrary dependences in the loops where the base object varies. */
1361 if (loop_nest
1362 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1363 DR_BASE_OBJECT (a)))
1364 {
1365 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1366 return res;
1367 }
1368
1369 /* If the number of dimensions of the access to not agree we can have
1370 a pointer access to a component of the array element type and an
1371 array access while the base-objects are still the same. Punt. */
1372 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1373 {
1374 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1375 return res;
1376 }
1377
1378 DDR_AFFINE_P (res) = true;
1379 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1380 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1381 DDR_LOOP_NEST (res) = loop_nest;
1382 DDR_INNER_LOOP (res) = 0;
1383 DDR_SELF_REFERENCE (res) = false;
1384
1385 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1386 {
1387 struct subscript *subscript;
1388
1389 subscript = XNEW (struct subscript);
1390 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1391 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1392 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1393 SUB_DISTANCE (subscript) = chrec_dont_know;
1394 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1395 }
1396
1397 return res;
1398 }
1399
1400 /* Frees memory used by the conflict function F. */
1401
1402 static void
1403 free_conflict_function (conflict_function *f)
1404 {
1405 unsigned i;
1406
1407 if (CF_NONTRIVIAL_P (f))
1408 {
1409 for (i = 0; i < f->n; i++)
1410 affine_fn_free (f->fns[i]);
1411 }
1412 free (f);
1413 }
1414
1415 /* Frees memory used by SUBSCRIPTS. */
1416
1417 static void
1418 free_subscripts (VEC (subscript_p, heap) *subscripts)
1419 {
1420 unsigned i;
1421 subscript_p s;
1422
1423 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1424 {
1425 free_conflict_function (s->conflicting_iterations_in_a);
1426 free_conflict_function (s->conflicting_iterations_in_b);
1427 free (s);
1428 }
1429 VEC_free (subscript_p, heap, subscripts);
1430 }
1431
1432 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1433 description. */
1434
1435 static inline void
1436 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1437 tree chrec)
1438 {
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1440 {
1441 fprintf (dump_file, "(dependence classified: ");
1442 print_generic_expr (dump_file, chrec, 0);
1443 fprintf (dump_file, ")\n");
1444 }
1445
1446 DDR_ARE_DEPENDENT (ddr) = chrec;
1447 free_subscripts (DDR_SUBSCRIPTS (ddr));
1448 DDR_SUBSCRIPTS (ddr) = NULL;
1449 }
1450
1451 /* The dependence relation DDR cannot be represented by a distance
1452 vector. */
1453
1454 static inline void
1455 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1456 {
1457 if (dump_file && (dump_flags & TDF_DETAILS))
1458 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1459
1460 DDR_AFFINE_P (ddr) = false;
1461 }
1462
1463 \f
1464
1465 /* This section contains the classic Banerjee tests. */
1466
1467 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1468 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1469
1470 static inline bool
1471 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1472 {
1473 return (evolution_function_is_constant_p (chrec_a)
1474 && evolution_function_is_constant_p (chrec_b));
1475 }
1476
1477 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1478 variable, i.e., if the SIV (Single Index Variable) test is true. */
1479
1480 static bool
1481 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1482 {
1483 if ((evolution_function_is_constant_p (chrec_a)
1484 && evolution_function_is_univariate_p (chrec_b))
1485 || (evolution_function_is_constant_p (chrec_b)
1486 && evolution_function_is_univariate_p (chrec_a)))
1487 return true;
1488
1489 if (evolution_function_is_univariate_p (chrec_a)
1490 && evolution_function_is_univariate_p (chrec_b))
1491 {
1492 switch (TREE_CODE (chrec_a))
1493 {
1494 case POLYNOMIAL_CHREC:
1495 switch (TREE_CODE (chrec_b))
1496 {
1497 case POLYNOMIAL_CHREC:
1498 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1499 return false;
1500
1501 default:
1502 return true;
1503 }
1504
1505 default:
1506 return true;
1507 }
1508 }
1509
1510 return false;
1511 }
1512
1513 /* Creates a conflict function with N dimensions. The affine functions
1514 in each dimension follow. */
1515
1516 static conflict_function *
1517 conflict_fn (unsigned n, ...)
1518 {
1519 unsigned i;
1520 conflict_function *ret = XCNEW (conflict_function);
1521 va_list ap;
1522
1523 gcc_assert (0 < n && n <= MAX_DIM);
1524 va_start(ap, n);
1525
1526 ret->n = n;
1527 for (i = 0; i < n; i++)
1528 ret->fns[i] = va_arg (ap, affine_fn);
1529 va_end(ap);
1530
1531 return ret;
1532 }
1533
1534 /* Returns constant affine function with value CST. */
1535
1536 static affine_fn
1537 affine_fn_cst (tree cst)
1538 {
1539 affine_fn fn = VEC_alloc (tree, heap, 1);
1540 VEC_quick_push (tree, fn, cst);
1541 return fn;
1542 }
1543
1544 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1545
1546 static affine_fn
1547 affine_fn_univar (tree cst, unsigned dim, tree coef)
1548 {
1549 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1550 unsigned i;
1551
1552 gcc_assert (dim > 0);
1553 VEC_quick_push (tree, fn, cst);
1554 for (i = 1; i < dim; i++)
1555 VEC_quick_push (tree, fn, integer_zero_node);
1556 VEC_quick_push (tree, fn, coef);
1557 return fn;
1558 }
1559
1560 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1561 *OVERLAPS_B are initialized to the functions that describe the
1562 relation between the elements accessed twice by CHREC_A and
1563 CHREC_B. For k >= 0, the following property is verified:
1564
1565 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1566
1567 static void
1568 analyze_ziv_subscript (tree chrec_a,
1569 tree chrec_b,
1570 conflict_function **overlaps_a,
1571 conflict_function **overlaps_b,
1572 tree *last_conflicts)
1573 {
1574 tree type, difference;
1575 dependence_stats.num_ziv++;
1576
1577 if (dump_file && (dump_flags & TDF_DETAILS))
1578 fprintf (dump_file, "(analyze_ziv_subscript \n");
1579
1580 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1581 chrec_a = chrec_convert (type, chrec_a, NULL);
1582 chrec_b = chrec_convert (type, chrec_b, NULL);
1583 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1584
1585 switch (TREE_CODE (difference))
1586 {
1587 case INTEGER_CST:
1588 if (integer_zerop (difference))
1589 {
1590 /* The difference is equal to zero: the accessed index
1591 overlaps for each iteration in the loop. */
1592 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1593 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1594 *last_conflicts = chrec_dont_know;
1595 dependence_stats.num_ziv_dependent++;
1596 }
1597 else
1598 {
1599 /* The accesses do not overlap. */
1600 *overlaps_a = conflict_fn_no_dependence ();
1601 *overlaps_b = conflict_fn_no_dependence ();
1602 *last_conflicts = integer_zero_node;
1603 dependence_stats.num_ziv_independent++;
1604 }
1605 break;
1606
1607 default:
1608 /* We're not sure whether the indexes overlap. For the moment,
1609 conservatively answer "don't know". */
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1612
1613 *overlaps_a = conflict_fn_not_known ();
1614 *overlaps_b = conflict_fn_not_known ();
1615 *last_conflicts = chrec_dont_know;
1616 dependence_stats.num_ziv_unimplemented++;
1617 break;
1618 }
1619
1620 if (dump_file && (dump_flags & TDF_DETAILS))
1621 fprintf (dump_file, ")\n");
1622 }
1623
1624 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1625 and only if it fits to the int type. If this is not the case, or the
1626 bound on the number of iterations of LOOP could not be derived, returns
1627 chrec_dont_know. */
1628
1629 static tree
1630 max_stmt_executions_tree (struct loop *loop)
1631 {
1632 double_int nit;
1633 tree type;
1634
1635 if (!max_stmt_executions (loop, true, &nit))
1636 return chrec_dont_know;
1637
1638 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1639 if (!double_int_fits_to_tree_p (type, nit))
1640 return chrec_dont_know;
1641
1642 return double_int_to_tree (type, nit);
1643 }
1644
1645 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1646 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1647 *OVERLAPS_B are initialized to the functions that describe the
1648 relation between the elements accessed twice by CHREC_A and
1649 CHREC_B. For k >= 0, the following property is verified:
1650
1651 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1652
1653 static void
1654 analyze_siv_subscript_cst_affine (tree chrec_a,
1655 tree chrec_b,
1656 conflict_function **overlaps_a,
1657 conflict_function **overlaps_b,
1658 tree *last_conflicts)
1659 {
1660 bool value0, value1, value2;
1661 tree type, difference, tmp;
1662
1663 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1664 chrec_a = chrec_convert (type, chrec_a, NULL);
1665 chrec_b = chrec_convert (type, chrec_b, NULL);
1666 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1667
1668 if (!chrec_is_positive (initial_condition (difference), &value0))
1669 {
1670 if (dump_file && (dump_flags & TDF_DETAILS))
1671 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1672
1673 dependence_stats.num_siv_unimplemented++;
1674 *overlaps_a = conflict_fn_not_known ();
1675 *overlaps_b = conflict_fn_not_known ();
1676 *last_conflicts = chrec_dont_know;
1677 return;
1678 }
1679 else
1680 {
1681 if (value0 == false)
1682 {
1683 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1684 {
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1687
1688 *overlaps_a = conflict_fn_not_known ();
1689 *overlaps_b = conflict_fn_not_known ();
1690 *last_conflicts = chrec_dont_know;
1691 dependence_stats.num_siv_unimplemented++;
1692 return;
1693 }
1694 else
1695 {
1696 if (value1 == true)
1697 {
1698 /* Example:
1699 chrec_a = 12
1700 chrec_b = {10, +, 1}
1701 */
1702
1703 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1704 {
1705 HOST_WIDE_INT numiter;
1706 struct loop *loop = get_chrec_loop (chrec_b);
1707
1708 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1709 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1710 fold_build1 (ABS_EXPR, type, difference),
1711 CHREC_RIGHT (chrec_b));
1712 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1713 *last_conflicts = integer_one_node;
1714
1715
1716 /* Perform weak-zero siv test to see if overlap is
1717 outside the loop bounds. */
1718 numiter = max_stmt_executions_int (loop, true);
1719
1720 if (numiter >= 0
1721 && compare_tree_int (tmp, numiter) > 0)
1722 {
1723 free_conflict_function (*overlaps_a);
1724 free_conflict_function (*overlaps_b);
1725 *overlaps_a = conflict_fn_no_dependence ();
1726 *overlaps_b = conflict_fn_no_dependence ();
1727 *last_conflicts = integer_zero_node;
1728 dependence_stats.num_siv_independent++;
1729 return;
1730 }
1731 dependence_stats.num_siv_dependent++;
1732 return;
1733 }
1734
1735 /* When the step does not divide the difference, there are
1736 no overlaps. */
1737 else
1738 {
1739 *overlaps_a = conflict_fn_no_dependence ();
1740 *overlaps_b = conflict_fn_no_dependence ();
1741 *last_conflicts = integer_zero_node;
1742 dependence_stats.num_siv_independent++;
1743 return;
1744 }
1745 }
1746
1747 else
1748 {
1749 /* Example:
1750 chrec_a = 12
1751 chrec_b = {10, +, -1}
1752
1753 In this case, chrec_a will not overlap with chrec_b. */
1754 *overlaps_a = conflict_fn_no_dependence ();
1755 *overlaps_b = conflict_fn_no_dependence ();
1756 *last_conflicts = integer_zero_node;
1757 dependence_stats.num_siv_independent++;
1758 return;
1759 }
1760 }
1761 }
1762 else
1763 {
1764 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1765 {
1766 if (dump_file && (dump_flags & TDF_DETAILS))
1767 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1768
1769 *overlaps_a = conflict_fn_not_known ();
1770 *overlaps_b = conflict_fn_not_known ();
1771 *last_conflicts = chrec_dont_know;
1772 dependence_stats.num_siv_unimplemented++;
1773 return;
1774 }
1775 else
1776 {
1777 if (value2 == false)
1778 {
1779 /* Example:
1780 chrec_a = 3
1781 chrec_b = {10, +, -1}
1782 */
1783 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1784 {
1785 HOST_WIDE_INT numiter;
1786 struct loop *loop = get_chrec_loop (chrec_b);
1787
1788 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1789 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1790 CHREC_RIGHT (chrec_b));
1791 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1792 *last_conflicts = integer_one_node;
1793
1794 /* Perform weak-zero siv test to see if overlap is
1795 outside the loop bounds. */
1796 numiter = max_stmt_executions_int (loop, true);
1797
1798 if (numiter >= 0
1799 && compare_tree_int (tmp, numiter) > 0)
1800 {
1801 free_conflict_function (*overlaps_a);
1802 free_conflict_function (*overlaps_b);
1803 *overlaps_a = conflict_fn_no_dependence ();
1804 *overlaps_b = conflict_fn_no_dependence ();
1805 *last_conflicts = integer_zero_node;
1806 dependence_stats.num_siv_independent++;
1807 return;
1808 }
1809 dependence_stats.num_siv_dependent++;
1810 return;
1811 }
1812
1813 /* When the step does not divide the difference, there
1814 are no overlaps. */
1815 else
1816 {
1817 *overlaps_a = conflict_fn_no_dependence ();
1818 *overlaps_b = conflict_fn_no_dependence ();
1819 *last_conflicts = integer_zero_node;
1820 dependence_stats.num_siv_independent++;
1821 return;
1822 }
1823 }
1824 else
1825 {
1826 /* Example:
1827 chrec_a = 3
1828 chrec_b = {4, +, 1}
1829
1830 In this case, chrec_a will not overlap with chrec_b. */
1831 *overlaps_a = conflict_fn_no_dependence ();
1832 *overlaps_b = conflict_fn_no_dependence ();
1833 *last_conflicts = integer_zero_node;
1834 dependence_stats.num_siv_independent++;
1835 return;
1836 }
1837 }
1838 }
1839 }
1840 }
1841
1842 /* Helper recursive function for initializing the matrix A. Returns
1843 the initial value of CHREC. */
1844
1845 static tree
1846 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1847 {
1848 gcc_assert (chrec);
1849
1850 switch (TREE_CODE (chrec))
1851 {
1852 case POLYNOMIAL_CHREC:
1853 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1854
1855 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1856 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1857
1858 case PLUS_EXPR:
1859 case MULT_EXPR:
1860 case MINUS_EXPR:
1861 {
1862 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1863 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1864
1865 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1866 }
1867
1868 case NOP_EXPR:
1869 {
1870 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1871 return chrec_convert (chrec_type (chrec), op, NULL);
1872 }
1873
1874 case BIT_NOT_EXPR:
1875 {
1876 /* Handle ~X as -1 - X. */
1877 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1878 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1879 build_int_cst (TREE_TYPE (chrec), -1), op);
1880 }
1881
1882 case INTEGER_CST:
1883 return chrec;
1884
1885 default:
1886 gcc_unreachable ();
1887 return NULL_TREE;
1888 }
1889 }
1890
1891 #define FLOOR_DIV(x,y) ((x) / (y))
1892
1893 /* Solves the special case of the Diophantine equation:
1894 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1895
1896 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1897 number of iterations that loops X and Y run. The overlaps will be
1898 constructed as evolutions in dimension DIM. */
1899
1900 static void
1901 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1902 affine_fn *overlaps_a,
1903 affine_fn *overlaps_b,
1904 tree *last_conflicts, int dim)
1905 {
1906 if (((step_a > 0 && step_b > 0)
1907 || (step_a < 0 && step_b < 0)))
1908 {
1909 int step_overlaps_a, step_overlaps_b;
1910 int gcd_steps_a_b, last_conflict, tau2;
1911
1912 gcd_steps_a_b = gcd (step_a, step_b);
1913 step_overlaps_a = step_b / gcd_steps_a_b;
1914 step_overlaps_b = step_a / gcd_steps_a_b;
1915
1916 if (niter > 0)
1917 {
1918 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1919 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1920 last_conflict = tau2;
1921 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1922 }
1923 else
1924 *last_conflicts = chrec_dont_know;
1925
1926 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1927 build_int_cst (NULL_TREE,
1928 step_overlaps_a));
1929 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1930 build_int_cst (NULL_TREE,
1931 step_overlaps_b));
1932 }
1933
1934 else
1935 {
1936 *overlaps_a = affine_fn_cst (integer_zero_node);
1937 *overlaps_b = affine_fn_cst (integer_zero_node);
1938 *last_conflicts = integer_zero_node;
1939 }
1940 }
1941
1942 /* Solves the special case of a Diophantine equation where CHREC_A is
1943 an affine bivariate function, and CHREC_B is an affine univariate
1944 function. For example,
1945
1946 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1947
1948 has the following overlapping functions:
1949
1950 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1951 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1952 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1953
1954 FORNOW: This is a specialized implementation for a case occurring in
1955 a common benchmark. Implement the general algorithm. */
1956
1957 static void
1958 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1959 conflict_function **overlaps_a,
1960 conflict_function **overlaps_b,
1961 tree *last_conflicts)
1962 {
1963 bool xz_p, yz_p, xyz_p;
1964 int step_x, step_y, step_z;
1965 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1966 affine_fn overlaps_a_xz, overlaps_b_xz;
1967 affine_fn overlaps_a_yz, overlaps_b_yz;
1968 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1969 affine_fn ova1, ova2, ovb;
1970 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1971
1972 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1973 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1974 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1975
1976 niter_x =
1977 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1978 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1979 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1980
1981 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1982 {
1983 if (dump_file && (dump_flags & TDF_DETAILS))
1984 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1985
1986 *overlaps_a = conflict_fn_not_known ();
1987 *overlaps_b = conflict_fn_not_known ();
1988 *last_conflicts = chrec_dont_know;
1989 return;
1990 }
1991
1992 niter = MIN (niter_x, niter_z);
1993 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1994 &overlaps_a_xz,
1995 &overlaps_b_xz,
1996 &last_conflicts_xz, 1);
1997 niter = MIN (niter_y, niter_z);
1998 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1999 &overlaps_a_yz,
2000 &overlaps_b_yz,
2001 &last_conflicts_yz, 2);
2002 niter = MIN (niter_x, niter_z);
2003 niter = MIN (niter_y, niter);
2004 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2005 &overlaps_a_xyz,
2006 &overlaps_b_xyz,
2007 &last_conflicts_xyz, 3);
2008
2009 xz_p = !integer_zerop (last_conflicts_xz);
2010 yz_p = !integer_zerop (last_conflicts_yz);
2011 xyz_p = !integer_zerop (last_conflicts_xyz);
2012
2013 if (xz_p || yz_p || xyz_p)
2014 {
2015 ova1 = affine_fn_cst (integer_zero_node);
2016 ova2 = affine_fn_cst (integer_zero_node);
2017 ovb = affine_fn_cst (integer_zero_node);
2018 if (xz_p)
2019 {
2020 affine_fn t0 = ova1;
2021 affine_fn t2 = ovb;
2022
2023 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2024 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2025 affine_fn_free (t0);
2026 affine_fn_free (t2);
2027 *last_conflicts = last_conflicts_xz;
2028 }
2029 if (yz_p)
2030 {
2031 affine_fn t0 = ova2;
2032 affine_fn t2 = ovb;
2033
2034 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2035 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2036 affine_fn_free (t0);
2037 affine_fn_free (t2);
2038 *last_conflicts = last_conflicts_yz;
2039 }
2040 if (xyz_p)
2041 {
2042 affine_fn t0 = ova1;
2043 affine_fn t2 = ova2;
2044 affine_fn t4 = ovb;
2045
2046 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2047 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2048 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2049 affine_fn_free (t0);
2050 affine_fn_free (t2);
2051 affine_fn_free (t4);
2052 *last_conflicts = last_conflicts_xyz;
2053 }
2054 *overlaps_a = conflict_fn (2, ova1, ova2);
2055 *overlaps_b = conflict_fn (1, ovb);
2056 }
2057 else
2058 {
2059 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2060 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2061 *last_conflicts = integer_zero_node;
2062 }
2063
2064 affine_fn_free (overlaps_a_xz);
2065 affine_fn_free (overlaps_b_xz);
2066 affine_fn_free (overlaps_a_yz);
2067 affine_fn_free (overlaps_b_yz);
2068 affine_fn_free (overlaps_a_xyz);
2069 affine_fn_free (overlaps_b_xyz);
2070 }
2071
2072 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2073
2074 static void
2075 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2076 int size)
2077 {
2078 memcpy (vec2, vec1, size * sizeof (*vec1));
2079 }
2080
2081 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2082
2083 static void
2084 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2085 int m, int n)
2086 {
2087 int i;
2088
2089 for (i = 0; i < m; i++)
2090 lambda_vector_copy (mat1[i], mat2[i], n);
2091 }
2092
2093 /* Store the N x N identity matrix in MAT. */
2094
2095 static void
2096 lambda_matrix_id (lambda_matrix mat, int size)
2097 {
2098 int i, j;
2099
2100 for (i = 0; i < size; i++)
2101 for (j = 0; j < size; j++)
2102 mat[i][j] = (i == j) ? 1 : 0;
2103 }
2104
2105 /* Return the first nonzero element of vector VEC1 between START and N.
2106 We must have START <= N. Returns N if VEC1 is the zero vector. */
2107
2108 static int
2109 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2110 {
2111 int j = start;
2112 while (j < n && vec1[j] == 0)
2113 j++;
2114 return j;
2115 }
2116
2117 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2118 R2 = R2 + CONST1 * R1. */
2119
2120 static void
2121 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2122 {
2123 int i;
2124
2125 if (const1 == 0)
2126 return;
2127
2128 for (i = 0; i < n; i++)
2129 mat[r2][i] += const1 * mat[r1][i];
2130 }
2131
2132 /* Swap rows R1 and R2 in matrix MAT. */
2133
2134 static void
2135 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2136 {
2137 lambda_vector row;
2138
2139 row = mat[r1];
2140 mat[r1] = mat[r2];
2141 mat[r2] = row;
2142 }
2143
2144 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2145 and store the result in VEC2. */
2146
2147 static void
2148 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2149 int size, int const1)
2150 {
2151 int i;
2152
2153 if (const1 == 0)
2154 lambda_vector_clear (vec2, size);
2155 else
2156 for (i = 0; i < size; i++)
2157 vec2[i] = const1 * vec1[i];
2158 }
2159
2160 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2161
2162 static void
2163 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2164 int size)
2165 {
2166 lambda_vector_mult_const (vec1, vec2, size, -1);
2167 }
2168
2169 /* Negate row R1 of matrix MAT which has N columns. */
2170
2171 static void
2172 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2173 {
2174 lambda_vector_negate (mat[r1], mat[r1], n);
2175 }
2176
2177 /* Return true if two vectors are equal. */
2178
2179 static bool
2180 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2181 {
2182 int i;
2183 for (i = 0; i < size; i++)
2184 if (vec1[i] != vec2[i])
2185 return false;
2186 return true;
2187 }
2188
2189 /* Given an M x N integer matrix A, this function determines an M x
2190 M unimodular matrix U, and an M x N echelon matrix S such that
2191 "U.A = S". This decomposition is also known as "right Hermite".
2192
2193 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2194 Restructuring Compilers" Utpal Banerjee. */
2195
2196 static void
2197 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2198 lambda_matrix S, lambda_matrix U)
2199 {
2200 int i, j, i0 = 0;
2201
2202 lambda_matrix_copy (A, S, m, n);
2203 lambda_matrix_id (U, m);
2204
2205 for (j = 0; j < n; j++)
2206 {
2207 if (lambda_vector_first_nz (S[j], m, i0) < m)
2208 {
2209 ++i0;
2210 for (i = m - 1; i >= i0; i--)
2211 {
2212 while (S[i][j] != 0)
2213 {
2214 int sigma, factor, a, b;
2215
2216 a = S[i-1][j];
2217 b = S[i][j];
2218 sigma = (a * b < 0) ? -1: 1;
2219 a = abs (a);
2220 b = abs (b);
2221 factor = sigma * (a / b);
2222
2223 lambda_matrix_row_add (S, n, i, i-1, -factor);
2224 lambda_matrix_row_exchange (S, i, i-1);
2225
2226 lambda_matrix_row_add (U, m, i, i-1, -factor);
2227 lambda_matrix_row_exchange (U, i, i-1);
2228 }
2229 }
2230 }
2231 }
2232 }
2233
2234 /* Determines the overlapping elements due to accesses CHREC_A and
2235 CHREC_B, that are affine functions. This function cannot handle
2236 symbolic evolution functions, ie. when initial conditions are
2237 parameters, because it uses lambda matrices of integers. */
2238
2239 static void
2240 analyze_subscript_affine_affine (tree chrec_a,
2241 tree chrec_b,
2242 conflict_function **overlaps_a,
2243 conflict_function **overlaps_b,
2244 tree *last_conflicts)
2245 {
2246 unsigned nb_vars_a, nb_vars_b, dim;
2247 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2248 lambda_matrix A, U, S;
2249 struct obstack scratch_obstack;
2250
2251 if (eq_evolutions_p (chrec_a, chrec_b))
2252 {
2253 /* The accessed index overlaps for each iteration in the
2254 loop. */
2255 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2256 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2257 *last_conflicts = chrec_dont_know;
2258 return;
2259 }
2260 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2262
2263 /* For determining the initial intersection, we have to solve a
2264 Diophantine equation. This is the most time consuming part.
2265
2266 For answering to the question: "Is there a dependence?" we have
2267 to prove that there exists a solution to the Diophantine
2268 equation, and that the solution is in the iteration domain,
2269 i.e. the solution is positive or zero, and that the solution
2270 happens before the upper bound loop.nb_iterations. Otherwise
2271 there is no dependence. This function outputs a description of
2272 the iterations that hold the intersections. */
2273
2274 nb_vars_a = nb_vars_in_chrec (chrec_a);
2275 nb_vars_b = nb_vars_in_chrec (chrec_b);
2276
2277 gcc_obstack_init (&scratch_obstack);
2278
2279 dim = nb_vars_a + nb_vars_b;
2280 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2281 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2282 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2283
2284 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2285 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2286 gamma = init_b - init_a;
2287
2288 /* Don't do all the hard work of solving the Diophantine equation
2289 when we already know the solution: for example,
2290 | {3, +, 1}_1
2291 | {3, +, 4}_2
2292 | gamma = 3 - 3 = 0.
2293 Then the first overlap occurs during the first iterations:
2294 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2295 */
2296 if (gamma == 0)
2297 {
2298 if (nb_vars_a == 1 && nb_vars_b == 1)
2299 {
2300 HOST_WIDE_INT step_a, step_b;
2301 HOST_WIDE_INT niter, niter_a, niter_b;
2302 affine_fn ova, ovb;
2303
2304 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2305 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2306 niter = MIN (niter_a, niter_b);
2307 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2308 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2309
2310 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2311 &ova, &ovb,
2312 last_conflicts, 1);
2313 *overlaps_a = conflict_fn (1, ova);
2314 *overlaps_b = conflict_fn (1, ovb);
2315 }
2316
2317 else if (nb_vars_a == 2 && nb_vars_b == 1)
2318 compute_overlap_steps_for_affine_1_2
2319 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2320
2321 else if (nb_vars_a == 1 && nb_vars_b == 2)
2322 compute_overlap_steps_for_affine_1_2
2323 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2324
2325 else
2326 {
2327 if (dump_file && (dump_flags & TDF_DETAILS))
2328 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2329 *overlaps_a = conflict_fn_not_known ();
2330 *overlaps_b = conflict_fn_not_known ();
2331 *last_conflicts = chrec_dont_know;
2332 }
2333 goto end_analyze_subs_aa;
2334 }
2335
2336 /* U.A = S */
2337 lambda_matrix_right_hermite (A, dim, 1, S, U);
2338
2339 if (S[0][0] < 0)
2340 {
2341 S[0][0] *= -1;
2342 lambda_matrix_row_negate (U, dim, 0);
2343 }
2344 gcd_alpha_beta = S[0][0];
2345
2346 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2347 but that is a quite strange case. Instead of ICEing, answer
2348 don't know. */
2349 if (gcd_alpha_beta == 0)
2350 {
2351 *overlaps_a = conflict_fn_not_known ();
2352 *overlaps_b = conflict_fn_not_known ();
2353 *last_conflicts = chrec_dont_know;
2354 goto end_analyze_subs_aa;
2355 }
2356
2357 /* The classic "gcd-test". */
2358 if (!int_divides_p (gcd_alpha_beta, gamma))
2359 {
2360 /* The "gcd-test" has determined that there is no integer
2361 solution, i.e. there is no dependence. */
2362 *overlaps_a = conflict_fn_no_dependence ();
2363 *overlaps_b = conflict_fn_no_dependence ();
2364 *last_conflicts = integer_zero_node;
2365 }
2366
2367 /* Both access functions are univariate. This includes SIV and MIV cases. */
2368 else if (nb_vars_a == 1 && nb_vars_b == 1)
2369 {
2370 /* Both functions should have the same evolution sign. */
2371 if (((A[0][0] > 0 && -A[1][0] > 0)
2372 || (A[0][0] < 0 && -A[1][0] < 0)))
2373 {
2374 /* The solutions are given by:
2375 |
2376 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2377 | [u21 u22] [y0]
2378
2379 For a given integer t. Using the following variables,
2380
2381 | i0 = u11 * gamma / gcd_alpha_beta
2382 | j0 = u12 * gamma / gcd_alpha_beta
2383 | i1 = u21
2384 | j1 = u22
2385
2386 the solutions are:
2387
2388 | x0 = i0 + i1 * t,
2389 | y0 = j0 + j1 * t. */
2390 HOST_WIDE_INT i0, j0, i1, j1;
2391
2392 i0 = U[0][0] * gamma / gcd_alpha_beta;
2393 j0 = U[0][1] * gamma / gcd_alpha_beta;
2394 i1 = U[1][0];
2395 j1 = U[1][1];
2396
2397 if ((i1 == 0 && i0 < 0)
2398 || (j1 == 0 && j0 < 0))
2399 {
2400 /* There is no solution.
2401 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2402 falls in here, but for the moment we don't look at the
2403 upper bound of the iteration domain. */
2404 *overlaps_a = conflict_fn_no_dependence ();
2405 *overlaps_b = conflict_fn_no_dependence ();
2406 *last_conflicts = integer_zero_node;
2407 goto end_analyze_subs_aa;
2408 }
2409
2410 if (i1 > 0 && j1 > 0)
2411 {
2412 HOST_WIDE_INT niter_a = max_stmt_executions_int
2413 (get_chrec_loop (chrec_a), true);
2414 HOST_WIDE_INT niter_b = max_stmt_executions_int
2415 (get_chrec_loop (chrec_b), true);
2416 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2417
2418 /* (X0, Y0) is a solution of the Diophantine equation:
2419 "chrec_a (X0) = chrec_b (Y0)". */
2420 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2421 CEIL (-j0, j1));
2422 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2423 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2424
2425 /* (X1, Y1) is the smallest positive solution of the eq
2426 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2427 first conflict occurs. */
2428 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2429 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2430 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2431
2432 if (niter > 0)
2433 {
2434 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2435 FLOOR_DIV (niter - j0, j1));
2436 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2437
2438 /* If the overlap occurs outside of the bounds of the
2439 loop, there is no dependence. */
2440 if (x1 >= niter || y1 >= niter)
2441 {
2442 *overlaps_a = conflict_fn_no_dependence ();
2443 *overlaps_b = conflict_fn_no_dependence ();
2444 *last_conflicts = integer_zero_node;
2445 goto end_analyze_subs_aa;
2446 }
2447 else
2448 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2449 }
2450 else
2451 *last_conflicts = chrec_dont_know;
2452
2453 *overlaps_a
2454 = conflict_fn (1,
2455 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2456 1,
2457 build_int_cst (NULL_TREE, i1)));
2458 *overlaps_b
2459 = conflict_fn (1,
2460 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2461 1,
2462 build_int_cst (NULL_TREE, j1)));
2463 }
2464 else
2465 {
2466 /* FIXME: For the moment, the upper bound of the
2467 iteration domain for i and j is not checked. */
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2470 *overlaps_a = conflict_fn_not_known ();
2471 *overlaps_b = conflict_fn_not_known ();
2472 *last_conflicts = chrec_dont_know;
2473 }
2474 }
2475 else
2476 {
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2482 }
2483 }
2484 else
2485 {
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2491 }
2492
2493 end_analyze_subs_aa:
2494 obstack_free (&scratch_obstack, NULL);
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 {
2497 fprintf (dump_file, " (overlaps_a = ");
2498 dump_conflict_function (dump_file, *overlaps_a);
2499 fprintf (dump_file, ")\n (overlaps_b = ");
2500 dump_conflict_function (dump_file, *overlaps_b);
2501 fprintf (dump_file, ")\n");
2502 fprintf (dump_file, ")\n");
2503 }
2504 }
2505
2506 /* Returns true when analyze_subscript_affine_affine can be used for
2507 determining the dependence relation between chrec_a and chrec_b,
2508 that contain symbols. This function modifies chrec_a and chrec_b
2509 such that the analysis result is the same, and such that they don't
2510 contain symbols, and then can safely be passed to the analyzer.
2511
2512 Example: The analysis of the following tuples of evolutions produce
2513 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2514 vs. {0, +, 1}_1
2515
2516 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2517 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2518 */
2519
2520 static bool
2521 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2522 {
2523 tree diff, type, left_a, left_b, right_b;
2524
2525 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2526 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2527 /* FIXME: For the moment not handled. Might be refined later. */
2528 return false;
2529
2530 type = chrec_type (*chrec_a);
2531 left_a = CHREC_LEFT (*chrec_a);
2532 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2533 diff = chrec_fold_minus (type, left_a, left_b);
2534
2535 if (!evolution_function_is_constant_p (diff))
2536 return false;
2537
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2540
2541 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2542 diff, CHREC_RIGHT (*chrec_a));
2543 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2544 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2545 build_int_cst (type, 0),
2546 right_b);
2547 return true;
2548 }
2549
2550 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2551 *OVERLAPS_B are initialized to the functions that describe the
2552 relation between the elements accessed twice by CHREC_A and
2553 CHREC_B. For k >= 0, the following property is verified:
2554
2555 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2556
2557 static void
2558 analyze_siv_subscript (tree chrec_a,
2559 tree chrec_b,
2560 conflict_function **overlaps_a,
2561 conflict_function **overlaps_b,
2562 tree *last_conflicts,
2563 int loop_nest_num)
2564 {
2565 dependence_stats.num_siv++;
2566
2567 if (dump_file && (dump_flags & TDF_DETAILS))
2568 fprintf (dump_file, "(analyze_siv_subscript \n");
2569
2570 if (evolution_function_is_constant_p (chrec_a)
2571 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2572 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2573 overlaps_a, overlaps_b, last_conflicts);
2574
2575 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2576 && evolution_function_is_constant_p (chrec_b))
2577 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2578 overlaps_b, overlaps_a, last_conflicts);
2579
2580 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2581 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2582 {
2583 if (!chrec_contains_symbols (chrec_a)
2584 && !chrec_contains_symbols (chrec_b))
2585 {
2586 analyze_subscript_affine_affine (chrec_a, chrec_b,
2587 overlaps_a, overlaps_b,
2588 last_conflicts);
2589
2590 if (CF_NOT_KNOWN_P (*overlaps_a)
2591 || CF_NOT_KNOWN_P (*overlaps_b))
2592 dependence_stats.num_siv_unimplemented++;
2593 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2594 || CF_NO_DEPENDENCE_P (*overlaps_b))
2595 dependence_stats.num_siv_independent++;
2596 else
2597 dependence_stats.num_siv_dependent++;
2598 }
2599 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2600 &chrec_b))
2601 {
2602 analyze_subscript_affine_affine (chrec_a, chrec_b,
2603 overlaps_a, overlaps_b,
2604 last_conflicts);
2605
2606 if (CF_NOT_KNOWN_P (*overlaps_a)
2607 || CF_NOT_KNOWN_P (*overlaps_b))
2608 dependence_stats.num_siv_unimplemented++;
2609 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2610 || CF_NO_DEPENDENCE_P (*overlaps_b))
2611 dependence_stats.num_siv_independent++;
2612 else
2613 dependence_stats.num_siv_dependent++;
2614 }
2615 else
2616 goto siv_subscript_dontknow;
2617 }
2618
2619 else
2620 {
2621 siv_subscript_dontknow:;
2622 if (dump_file && (dump_flags & TDF_DETAILS))
2623 fprintf (dump_file, "siv test failed: unimplemented.\n");
2624 *overlaps_a = conflict_fn_not_known ();
2625 *overlaps_b = conflict_fn_not_known ();
2626 *last_conflicts = chrec_dont_know;
2627 dependence_stats.num_siv_unimplemented++;
2628 }
2629
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2631 fprintf (dump_file, ")\n");
2632 }
2633
2634 /* Returns false if we can prove that the greatest common divisor of the steps
2635 of CHREC does not divide CST, false otherwise. */
2636
2637 static bool
2638 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2639 {
2640 HOST_WIDE_INT cd = 0, val;
2641 tree step;
2642
2643 if (!host_integerp (cst, 0))
2644 return true;
2645 val = tree_low_cst (cst, 0);
2646
2647 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2648 {
2649 step = CHREC_RIGHT (chrec);
2650 if (!host_integerp (step, 0))
2651 return true;
2652 cd = gcd (cd, tree_low_cst (step, 0));
2653 chrec = CHREC_LEFT (chrec);
2654 }
2655
2656 return val % cd == 0;
2657 }
2658
2659 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2660 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2661 functions that describe the relation between the elements accessed
2662 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2663 is verified:
2664
2665 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2666
2667 static void
2668 analyze_miv_subscript (tree chrec_a,
2669 tree chrec_b,
2670 conflict_function **overlaps_a,
2671 conflict_function **overlaps_b,
2672 tree *last_conflicts,
2673 struct loop *loop_nest)
2674 {
2675 tree type, difference;
2676
2677 dependence_stats.num_miv++;
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "(analyze_miv_subscript \n");
2680
2681 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2682 chrec_a = chrec_convert (type, chrec_a, NULL);
2683 chrec_b = chrec_convert (type, chrec_b, NULL);
2684 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2685
2686 if (eq_evolutions_p (chrec_a, chrec_b))
2687 {
2688 /* Access functions are the same: all the elements are accessed
2689 in the same order. */
2690 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2691 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2692 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2693 dependence_stats.num_miv_dependent++;
2694 }
2695
2696 else if (evolution_function_is_constant_p (difference)
2697 /* For the moment, the following is verified:
2698 evolution_function_is_affine_multivariate_p (chrec_a,
2699 loop_nest->num) */
2700 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2701 {
2702 /* testsuite/.../ssa-chrec-33.c
2703 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2704
2705 The difference is 1, and all the evolution steps are multiples
2706 of 2, consequently there are no overlapping elements. */
2707 *overlaps_a = conflict_fn_no_dependence ();
2708 *overlaps_b = conflict_fn_no_dependence ();
2709 *last_conflicts = integer_zero_node;
2710 dependence_stats.num_miv_independent++;
2711 }
2712
2713 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2714 && !chrec_contains_symbols (chrec_a)
2715 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2716 && !chrec_contains_symbols (chrec_b))
2717 {
2718 /* testsuite/.../ssa-chrec-35.c
2719 {0, +, 1}_2 vs. {0, +, 1}_3
2720 the overlapping elements are respectively located at iterations:
2721 {0, +, 1}_x and {0, +, 1}_x,
2722 in other words, we have the equality:
2723 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2724
2725 Other examples:
2726 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2727 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2728
2729 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2730 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2731 */
2732 analyze_subscript_affine_affine (chrec_a, chrec_b,
2733 overlaps_a, overlaps_b, last_conflicts);
2734
2735 if (CF_NOT_KNOWN_P (*overlaps_a)
2736 || CF_NOT_KNOWN_P (*overlaps_b))
2737 dependence_stats.num_miv_unimplemented++;
2738 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2739 || CF_NO_DEPENDENCE_P (*overlaps_b))
2740 dependence_stats.num_miv_independent++;
2741 else
2742 dependence_stats.num_miv_dependent++;
2743 }
2744
2745 else
2746 {
2747 /* When the analysis is too difficult, answer "don't know". */
2748 if (dump_file && (dump_flags & TDF_DETAILS))
2749 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2750
2751 *overlaps_a = conflict_fn_not_known ();
2752 *overlaps_b = conflict_fn_not_known ();
2753 *last_conflicts = chrec_dont_know;
2754 dependence_stats.num_miv_unimplemented++;
2755 }
2756
2757 if (dump_file && (dump_flags & TDF_DETAILS))
2758 fprintf (dump_file, ")\n");
2759 }
2760
2761 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2762 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2763 OVERLAP_ITERATIONS_B are initialized with two functions that
2764 describe the iterations that contain conflicting elements.
2765
2766 Remark: For an integer k >= 0, the following equality is true:
2767
2768 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2769 */
2770
2771 static void
2772 analyze_overlapping_iterations (tree chrec_a,
2773 tree chrec_b,
2774 conflict_function **overlap_iterations_a,
2775 conflict_function **overlap_iterations_b,
2776 tree *last_conflicts, struct loop *loop_nest)
2777 {
2778 unsigned int lnn = loop_nest->num;
2779
2780 dependence_stats.num_subscript_tests++;
2781
2782 if (dump_file && (dump_flags & TDF_DETAILS))
2783 {
2784 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2785 fprintf (dump_file, " (chrec_a = ");
2786 print_generic_expr (dump_file, chrec_a, 0);
2787 fprintf (dump_file, ")\n (chrec_b = ");
2788 print_generic_expr (dump_file, chrec_b, 0);
2789 fprintf (dump_file, ")\n");
2790 }
2791
2792 if (chrec_a == NULL_TREE
2793 || chrec_b == NULL_TREE
2794 || chrec_contains_undetermined (chrec_a)
2795 || chrec_contains_undetermined (chrec_b))
2796 {
2797 dependence_stats.num_subscript_undetermined++;
2798
2799 *overlap_iterations_a = conflict_fn_not_known ();
2800 *overlap_iterations_b = conflict_fn_not_known ();
2801 }
2802
2803 /* If they are the same chrec, and are affine, they overlap
2804 on every iteration. */
2805 else if (eq_evolutions_p (chrec_a, chrec_b)
2806 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2807 || operand_equal_p (chrec_a, chrec_b, 0)))
2808 {
2809 dependence_stats.num_same_subscript_function++;
2810 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2811 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2812 *last_conflicts = chrec_dont_know;
2813 }
2814
2815 /* If they aren't the same, and aren't affine, we can't do anything
2816 yet. */
2817 else if ((chrec_contains_symbols (chrec_a)
2818 || chrec_contains_symbols (chrec_b))
2819 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2820 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2821 {
2822 dependence_stats.num_subscript_undetermined++;
2823 *overlap_iterations_a = conflict_fn_not_known ();
2824 *overlap_iterations_b = conflict_fn_not_known ();
2825 }
2826
2827 else if (ziv_subscript_p (chrec_a, chrec_b))
2828 analyze_ziv_subscript (chrec_a, chrec_b,
2829 overlap_iterations_a, overlap_iterations_b,
2830 last_conflicts);
2831
2832 else if (siv_subscript_p (chrec_a, chrec_b))
2833 analyze_siv_subscript (chrec_a, chrec_b,
2834 overlap_iterations_a, overlap_iterations_b,
2835 last_conflicts, lnn);
2836
2837 else
2838 analyze_miv_subscript (chrec_a, chrec_b,
2839 overlap_iterations_a, overlap_iterations_b,
2840 last_conflicts, loop_nest);
2841
2842 if (dump_file && (dump_flags & TDF_DETAILS))
2843 {
2844 fprintf (dump_file, " (overlap_iterations_a = ");
2845 dump_conflict_function (dump_file, *overlap_iterations_a);
2846 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2847 dump_conflict_function (dump_file, *overlap_iterations_b);
2848 fprintf (dump_file, ")\n");
2849 fprintf (dump_file, ")\n");
2850 }
2851 }
2852
2853 /* Helper function for uniquely inserting distance vectors. */
2854
2855 static void
2856 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2857 {
2858 unsigned i;
2859 lambda_vector v;
2860
2861 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2862 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2863 return;
2864
2865 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2866 }
2867
2868 /* Helper function for uniquely inserting direction vectors. */
2869
2870 static void
2871 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2872 {
2873 unsigned i;
2874 lambda_vector v;
2875
2876 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2877 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2878 return;
2879
2880 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2881 }
2882
2883 /* Add a distance of 1 on all the loops outer than INDEX. If we
2884 haven't yet determined a distance for this outer loop, push a new
2885 distance vector composed of the previous distance, and a distance
2886 of 1 for this outer loop. Example:
2887
2888 | loop_1
2889 | loop_2
2890 | A[10]
2891 | endloop_2
2892 | endloop_1
2893
2894 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2895 save (0, 1), then we have to save (1, 0). */
2896
2897 static void
2898 add_outer_distances (struct data_dependence_relation *ddr,
2899 lambda_vector dist_v, int index)
2900 {
2901 /* For each outer loop where init_v is not set, the accesses are
2902 in dependence of distance 1 in the loop. */
2903 while (--index >= 0)
2904 {
2905 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2906 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2907 save_v[index] = 1;
2908 save_dist_v (ddr, save_v);
2909 }
2910 }
2911
2912 /* Return false when fail to represent the data dependence as a
2913 distance vector. INIT_B is set to true when a component has been
2914 added to the distance vector DIST_V. INDEX_CARRY is then set to
2915 the index in DIST_V that carries the dependence. */
2916
2917 static bool
2918 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2919 struct data_reference *ddr_a,
2920 struct data_reference *ddr_b,
2921 lambda_vector dist_v, bool *init_b,
2922 int *index_carry)
2923 {
2924 unsigned i;
2925 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2926
2927 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2928 {
2929 tree access_fn_a, access_fn_b;
2930 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2931
2932 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2933 {
2934 non_affine_dependence_relation (ddr);
2935 return false;
2936 }
2937
2938 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2939 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2940
2941 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2942 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2943 {
2944 int dist, index;
2945 int var_a = CHREC_VARIABLE (access_fn_a);
2946 int var_b = CHREC_VARIABLE (access_fn_b);
2947
2948 if (var_a != var_b
2949 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2950 {
2951 non_affine_dependence_relation (ddr);
2952 return false;
2953 }
2954
2955 dist = int_cst_value (SUB_DISTANCE (subscript));
2956 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2957 *index_carry = MIN (index, *index_carry);
2958
2959 /* This is the subscript coupling test. If we have already
2960 recorded a distance for this loop (a distance coming from
2961 another subscript), it should be the same. For example,
2962 in the following code, there is no dependence:
2963
2964 | loop i = 0, N, 1
2965 | T[i+1][i] = ...
2966 | ... = T[i][i]
2967 | endloop
2968 */
2969 if (init_v[index] != 0 && dist_v[index] != dist)
2970 {
2971 finalize_ddr_dependent (ddr, chrec_known);
2972 return false;
2973 }
2974
2975 dist_v[index] = dist;
2976 init_v[index] = 1;
2977 *init_b = true;
2978 }
2979 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2980 {
2981 /* This can be for example an affine vs. constant dependence
2982 (T[i] vs. T[3]) that is not an affine dependence and is
2983 not representable as a distance vector. */
2984 non_affine_dependence_relation (ddr);
2985 return false;
2986 }
2987 }
2988
2989 return true;
2990 }
2991
2992 /* Return true when the DDR contains only constant access functions. */
2993
2994 static bool
2995 constant_access_functions (const struct data_dependence_relation *ddr)
2996 {
2997 unsigned i;
2998
2999 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3000 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3001 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3002 return false;
3003
3004 return true;
3005 }
3006
3007 /* Helper function for the case where DDR_A and DDR_B are the same
3008 multivariate access function with a constant step. For an example
3009 see pr34635-1.c. */
3010
3011 static void
3012 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3013 {
3014 int x_1, x_2;
3015 tree c_1 = CHREC_LEFT (c_2);
3016 tree c_0 = CHREC_LEFT (c_1);
3017 lambda_vector dist_v;
3018 int v1, v2, cd;
3019
3020 /* Polynomials with more than 2 variables are not handled yet. When
3021 the evolution steps are parameters, it is not possible to
3022 represent the dependence using classical distance vectors. */
3023 if (TREE_CODE (c_0) != INTEGER_CST
3024 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3025 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3026 {
3027 DDR_AFFINE_P (ddr) = false;
3028 return;
3029 }
3030
3031 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3032 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3033
3034 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3035 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3036 v1 = int_cst_value (CHREC_RIGHT (c_1));
3037 v2 = int_cst_value (CHREC_RIGHT (c_2));
3038 cd = gcd (v1, v2);
3039 v1 /= cd;
3040 v2 /= cd;
3041
3042 if (v2 < 0)
3043 {
3044 v2 = -v2;
3045 v1 = -v1;
3046 }
3047
3048 dist_v[x_1] = v2;
3049 dist_v[x_2] = -v1;
3050 save_dist_v (ddr, dist_v);
3051
3052 add_outer_distances (ddr, dist_v, x_1);
3053 }
3054
3055 /* Helper function for the case where DDR_A and DDR_B are the same
3056 access functions. */
3057
3058 static void
3059 add_other_self_distances (struct data_dependence_relation *ddr)
3060 {
3061 lambda_vector dist_v;
3062 unsigned i;
3063 int index_carry = DDR_NB_LOOPS (ddr);
3064
3065 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3066 {
3067 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3068
3069 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3070 {
3071 if (!evolution_function_is_univariate_p (access_fun))
3072 {
3073 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3074 {
3075 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3076 return;
3077 }
3078
3079 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3080
3081 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3082 add_multivariate_self_dist (ddr, access_fun);
3083 else
3084 /* The evolution step is not constant: it varies in
3085 the outer loop, so this cannot be represented by a
3086 distance vector. For example in pr34635.c the
3087 evolution is {0, +, {0, +, 4}_1}_2. */
3088 DDR_AFFINE_P (ddr) = false;
3089
3090 return;
3091 }
3092
3093 index_carry = MIN (index_carry,
3094 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3095 DDR_LOOP_NEST (ddr)));
3096 }
3097 }
3098
3099 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3100 add_outer_distances (ddr, dist_v, index_carry);
3101 }
3102
3103 static void
3104 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3105 {
3106 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3107
3108 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3109 save_dist_v (ddr, dist_v);
3110 }
3111
3112 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3113 is the case for example when access functions are the same and
3114 equal to a constant, as in:
3115
3116 | loop_1
3117 | A[3] = ...
3118 | ... = A[3]
3119 | endloop_1
3120
3121 in which case the distance vectors are (0) and (1). */
3122
3123 static void
3124 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3125 {
3126 unsigned i, j;
3127
3128 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3129 {
3130 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3131 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3132 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3133
3134 for (j = 0; j < ca->n; j++)
3135 if (affine_function_zero_p (ca->fns[j]))
3136 {
3137 insert_innermost_unit_dist_vector (ddr);
3138 return;
3139 }
3140
3141 for (j = 0; j < cb->n; j++)
3142 if (affine_function_zero_p (cb->fns[j]))
3143 {
3144 insert_innermost_unit_dist_vector (ddr);
3145 return;
3146 }
3147 }
3148 }
3149
3150 /* Compute the classic per loop distance vector. DDR is the data
3151 dependence relation to build a vector from. Return false when fail
3152 to represent the data dependence as a distance vector. */
3153
3154 static bool
3155 build_classic_dist_vector (struct data_dependence_relation *ddr,
3156 struct loop *loop_nest)
3157 {
3158 bool init_b = false;
3159 int index_carry = DDR_NB_LOOPS (ddr);
3160 lambda_vector dist_v;
3161
3162 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3163 return false;
3164
3165 if (same_access_functions (ddr))
3166 {
3167 /* Save the 0 vector. */
3168 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3169 save_dist_v (ddr, dist_v);
3170
3171 if (constant_access_functions (ddr))
3172 add_distance_for_zero_overlaps (ddr);
3173
3174 if (DDR_NB_LOOPS (ddr) > 1)
3175 add_other_self_distances (ddr);
3176
3177 return true;
3178 }
3179
3180 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3181 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3182 dist_v, &init_b, &index_carry))
3183 return false;
3184
3185 /* Save the distance vector if we initialized one. */
3186 if (init_b)
3187 {
3188 /* Verify a basic constraint: classic distance vectors should
3189 always be lexicographically positive.
3190
3191 Data references are collected in the order of execution of
3192 the program, thus for the following loop
3193
3194 | for (i = 1; i < 100; i++)
3195 | for (j = 1; j < 100; j++)
3196 | {
3197 | t = T[j+1][i-1]; // A
3198 | T[j][i] = t + 2; // B
3199 | }
3200
3201 references are collected following the direction of the wind:
3202 A then B. The data dependence tests are performed also
3203 following this order, such that we're looking at the distance
3204 separating the elements accessed by A from the elements later
3205 accessed by B. But in this example, the distance returned by
3206 test_dep (A, B) is lexicographically negative (-1, 1), that
3207 means that the access A occurs later than B with respect to
3208 the outer loop, ie. we're actually looking upwind. In this
3209 case we solve test_dep (B, A) looking downwind to the
3210 lexicographically positive solution, that returns the
3211 distance vector (1, -1). */
3212 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3213 {
3214 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3215 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3216 loop_nest))
3217 return false;
3218 compute_subscript_distance (ddr);
3219 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3220 save_v, &init_b, &index_carry))
3221 return false;
3222 save_dist_v (ddr, save_v);
3223 DDR_REVERSED_P (ddr) = true;
3224
3225 /* In this case there is a dependence forward for all the
3226 outer loops:
3227
3228 | for (k = 1; k < 100; k++)
3229 | for (i = 1; i < 100; i++)
3230 | for (j = 1; j < 100; j++)
3231 | {
3232 | t = T[j+1][i-1]; // A
3233 | T[j][i] = t + 2; // B
3234 | }
3235
3236 the vectors are:
3237 (0, 1, -1)
3238 (1, 1, -1)
3239 (1, -1, 1)
3240 */
3241 if (DDR_NB_LOOPS (ddr) > 1)
3242 {
3243 add_outer_distances (ddr, save_v, index_carry);
3244 add_outer_distances (ddr, dist_v, index_carry);
3245 }
3246 }
3247 else
3248 {
3249 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3250 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3251
3252 if (DDR_NB_LOOPS (ddr) > 1)
3253 {
3254 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3255
3256 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3257 DDR_A (ddr), loop_nest))
3258 return false;
3259 compute_subscript_distance (ddr);
3260 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3261 opposite_v, &init_b,
3262 &index_carry))
3263 return false;
3264
3265 save_dist_v (ddr, save_v);
3266 add_outer_distances (ddr, dist_v, index_carry);
3267 add_outer_distances (ddr, opposite_v, index_carry);
3268 }
3269 else
3270 save_dist_v (ddr, save_v);
3271 }
3272 }
3273 else
3274 {
3275 /* There is a distance of 1 on all the outer loops: Example:
3276 there is a dependence of distance 1 on loop_1 for the array A.
3277
3278 | loop_1
3279 | A[5] = ...
3280 | endloop
3281 */
3282 add_outer_distances (ddr, dist_v,
3283 lambda_vector_first_nz (dist_v,
3284 DDR_NB_LOOPS (ddr), 0));
3285 }
3286
3287 if (dump_file && (dump_flags & TDF_DETAILS))
3288 {
3289 unsigned i;
3290
3291 fprintf (dump_file, "(build_classic_dist_vector\n");
3292 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3293 {
3294 fprintf (dump_file, " dist_vector = (");
3295 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3296 DDR_NB_LOOPS (ddr));
3297 fprintf (dump_file, " )\n");
3298 }
3299 fprintf (dump_file, ")\n");
3300 }
3301
3302 return true;
3303 }
3304
3305 /* Return the direction for a given distance.
3306 FIXME: Computing dir this way is suboptimal, since dir can catch
3307 cases that dist is unable to represent. */
3308
3309 static inline enum data_dependence_direction
3310 dir_from_dist (int dist)
3311 {
3312 if (dist > 0)
3313 return dir_positive;
3314 else if (dist < 0)
3315 return dir_negative;
3316 else
3317 return dir_equal;
3318 }
3319
3320 /* Compute the classic per loop direction vector. DDR is the data
3321 dependence relation to build a vector from. */
3322
3323 static void
3324 build_classic_dir_vector (struct data_dependence_relation *ddr)
3325 {
3326 unsigned i, j;
3327 lambda_vector dist_v;
3328
3329 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3330 {
3331 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3332
3333 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3334 dir_v[j] = dir_from_dist (dist_v[j]);
3335
3336 save_dir_v (ddr, dir_v);
3337 }
3338 }
3339
3340 /* Helper function. Returns true when there is a dependence between
3341 data references DRA and DRB. */
3342
3343 static bool
3344 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3345 struct data_reference *dra,
3346 struct data_reference *drb,
3347 struct loop *loop_nest)
3348 {
3349 unsigned int i;
3350 tree last_conflicts;
3351 struct subscript *subscript;
3352
3353 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3354 i++)
3355 {
3356 conflict_function *overlaps_a, *overlaps_b;
3357
3358 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3359 DR_ACCESS_FN (drb, i),
3360 &overlaps_a, &overlaps_b,
3361 &last_conflicts, loop_nest);
3362
3363 if (CF_NOT_KNOWN_P (overlaps_a)
3364 || CF_NOT_KNOWN_P (overlaps_b))
3365 {
3366 finalize_ddr_dependent (ddr, chrec_dont_know);
3367 dependence_stats.num_dependence_undetermined++;
3368 free_conflict_function (overlaps_a);
3369 free_conflict_function (overlaps_b);
3370 return false;
3371 }
3372
3373 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3374 || CF_NO_DEPENDENCE_P (overlaps_b))
3375 {
3376 finalize_ddr_dependent (ddr, chrec_known);
3377 dependence_stats.num_dependence_independent++;
3378 free_conflict_function (overlaps_a);
3379 free_conflict_function (overlaps_b);
3380 return false;
3381 }
3382
3383 else
3384 {
3385 if (SUB_CONFLICTS_IN_A (subscript))
3386 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3387 if (SUB_CONFLICTS_IN_B (subscript))
3388 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3389
3390 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3391 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3392 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3393 }
3394 }
3395
3396 return true;
3397 }
3398
3399 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3400
3401 static void
3402 subscript_dependence_tester (struct data_dependence_relation *ddr,
3403 struct loop *loop_nest)
3404 {
3405
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, "(subscript_dependence_tester \n");
3408
3409 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3410 dependence_stats.num_dependence_dependent++;
3411
3412 compute_subscript_distance (ddr);
3413 if (build_classic_dist_vector (ddr, loop_nest))
3414 build_classic_dir_vector (ddr);
3415
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, ")\n");
3418 }
3419
3420 /* Returns true when all the access functions of A are affine or
3421 constant with respect to LOOP_NEST. */
3422
3423 static bool
3424 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3425 const struct loop *loop_nest)
3426 {
3427 unsigned int i;
3428 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3429 tree t;
3430
3431 FOR_EACH_VEC_ELT (tree, fns, i, t)
3432 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3433 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3434 return false;
3435
3436 return true;
3437 }
3438
3439 /* Initializes an equation for an OMEGA problem using the information
3440 contained in the ACCESS_FUN. Returns true when the operation
3441 succeeded.
3442
3443 PB is the omega constraint system.
3444 EQ is the number of the equation to be initialized.
3445 OFFSET is used for shifting the variables names in the constraints:
3446 a constrain is composed of 2 * the number of variables surrounding
3447 dependence accesses. OFFSET is set either to 0 for the first n variables,
3448 then it is set to n.
3449 ACCESS_FUN is expected to be an affine chrec. */
3450
3451 static bool
3452 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3453 unsigned int offset, tree access_fun,
3454 struct data_dependence_relation *ddr)
3455 {
3456 switch (TREE_CODE (access_fun))
3457 {
3458 case POLYNOMIAL_CHREC:
3459 {
3460 tree left = CHREC_LEFT (access_fun);
3461 tree right = CHREC_RIGHT (access_fun);
3462 int var = CHREC_VARIABLE (access_fun);
3463 unsigned var_idx;
3464
3465 if (TREE_CODE (right) != INTEGER_CST)
3466 return false;
3467
3468 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3469 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3470
3471 /* Compute the innermost loop index. */
3472 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3473
3474 if (offset == 0)
3475 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3476 += int_cst_value (right);
3477
3478 switch (TREE_CODE (left))
3479 {
3480 case POLYNOMIAL_CHREC:
3481 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3482
3483 case INTEGER_CST:
3484 pb->eqs[eq].coef[0] += int_cst_value (left);
3485 return true;
3486
3487 default:
3488 return false;
3489 }
3490 }
3491
3492 case INTEGER_CST:
3493 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3494 return true;
3495
3496 default:
3497 return false;
3498 }
3499 }
3500
3501 /* As explained in the comments preceding init_omega_for_ddr, we have
3502 to set up a system for each loop level, setting outer loops
3503 variation to zero, and current loop variation to positive or zero.
3504 Save each lexico positive distance vector. */
3505
3506 static void
3507 omega_extract_distance_vectors (omega_pb pb,
3508 struct data_dependence_relation *ddr)
3509 {
3510 int eq, geq;
3511 unsigned i, j;
3512 struct loop *loopi, *loopj;
3513 enum omega_result res;
3514
3515 /* Set a new problem for each loop in the nest. The basis is the
3516 problem that we have initialized until now. On top of this we
3517 add new constraints. */
3518 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3519 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3520 {
3521 int dist = 0;
3522 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3523 DDR_NB_LOOPS (ddr));
3524
3525 omega_copy_problem (copy, pb);
3526
3527 /* For all the outer loops "loop_j", add "dj = 0". */
3528 for (j = 0;
3529 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3530 {
3531 eq = omega_add_zero_eq (copy, omega_black);
3532 copy->eqs[eq].coef[j + 1] = 1;
3533 }
3534
3535 /* For "loop_i", add "0 <= di". */
3536 geq = omega_add_zero_geq (copy, omega_black);
3537 copy->geqs[geq].coef[i + 1] = 1;
3538
3539 /* Reduce the constraint system, and test that the current
3540 problem is feasible. */
3541 res = omega_simplify_problem (copy);
3542 if (res == omega_false
3543 || res == omega_unknown
3544 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3545 goto next_problem;
3546
3547 for (eq = 0; eq < copy->num_subs; eq++)
3548 if (copy->subs[eq].key == (int) i + 1)
3549 {
3550 dist = copy->subs[eq].coef[0];
3551 goto found_dist;
3552 }
3553
3554 if (dist == 0)
3555 {
3556 /* Reinitialize problem... */
3557 omega_copy_problem (copy, pb);
3558 for (j = 0;
3559 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3560 {
3561 eq = omega_add_zero_eq (copy, omega_black);
3562 copy->eqs[eq].coef[j + 1] = 1;
3563 }
3564
3565 /* ..., but this time "di = 1". */
3566 eq = omega_add_zero_eq (copy, omega_black);
3567 copy->eqs[eq].coef[i + 1] = 1;
3568 copy->eqs[eq].coef[0] = -1;
3569
3570 res = omega_simplify_problem (copy);
3571 if (res == omega_false
3572 || res == omega_unknown
3573 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3574 goto next_problem;
3575
3576 for (eq = 0; eq < copy->num_subs; eq++)
3577 if (copy->subs[eq].key == (int) i + 1)
3578 {
3579 dist = copy->subs[eq].coef[0];
3580 goto found_dist;
3581 }
3582 }
3583
3584 found_dist:;
3585 /* Save the lexicographically positive distance vector. */
3586 if (dist >= 0)
3587 {
3588 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3589 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3590
3591 dist_v[i] = dist;
3592
3593 for (eq = 0; eq < copy->num_subs; eq++)
3594 if (copy->subs[eq].key > 0)
3595 {
3596 dist = copy->subs[eq].coef[0];
3597 dist_v[copy->subs[eq].key - 1] = dist;
3598 }
3599
3600 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3601 dir_v[j] = dir_from_dist (dist_v[j]);
3602
3603 save_dist_v (ddr, dist_v);
3604 save_dir_v (ddr, dir_v);
3605 }
3606
3607 next_problem:;
3608 omega_free_problem (copy);
3609 }
3610 }
3611
3612 /* This is called for each subscript of a tuple of data references:
3613 insert an equality for representing the conflicts. */
3614
3615 static bool
3616 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3617 struct data_dependence_relation *ddr,
3618 omega_pb pb, bool *maybe_dependent)
3619 {
3620 int eq;
3621 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3622 TREE_TYPE (access_fun_b));
3623 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3624 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3625 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3626 tree minus_one;
3627
3628 /* When the fun_a - fun_b is not constant, the dependence is not
3629 captured by the classic distance vector representation. */
3630 if (TREE_CODE (difference) != INTEGER_CST)
3631 return false;
3632
3633 /* ZIV test. */
3634 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3635 {
3636 /* There is no dependence. */
3637 *maybe_dependent = false;
3638 return true;
3639 }
3640
3641 minus_one = build_int_cst (type, -1);
3642 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3643
3644 eq = omega_add_zero_eq (pb, omega_black);
3645 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3646 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3647 /* There is probably a dependence, but the system of
3648 constraints cannot be built: answer "don't know". */
3649 return false;
3650
3651 /* GCD test. */
3652 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3653 && !int_divides_p (lambda_vector_gcd
3654 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3655 2 * DDR_NB_LOOPS (ddr)),
3656 pb->eqs[eq].coef[0]))
3657 {
3658 /* There is no dependence. */
3659 *maybe_dependent = false;
3660 return true;
3661 }
3662
3663 return true;
3664 }
3665
3666 /* Helper function, same as init_omega_for_ddr but specialized for
3667 data references A and B. */
3668
3669 static bool
3670 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3671 struct data_dependence_relation *ddr,
3672 omega_pb pb, bool *maybe_dependent)
3673 {
3674 unsigned i;
3675 int ineq;
3676 struct loop *loopi;
3677 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3678
3679 /* Insert an equality per subscript. */
3680 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3681 {
3682 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3683 ddr, pb, maybe_dependent))
3684 return false;
3685 else if (*maybe_dependent == false)
3686 {
3687 /* There is no dependence. */
3688 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3689 return true;
3690 }
3691 }
3692
3693 /* Insert inequalities: constraints corresponding to the iteration
3694 domain, i.e. the loops surrounding the references "loop_x" and
3695 the distance variables "dx". The layout of the OMEGA
3696 representation is as follows:
3697 - coef[0] is the constant
3698 - coef[1..nb_loops] are the protected variables that will not be
3699 removed by the solver: the "dx"
3700 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3701 */
3702 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3703 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3704 {
3705 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3706
3707 /* 0 <= loop_x */
3708 ineq = omega_add_zero_geq (pb, omega_black);
3709 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3710
3711 /* 0 <= loop_x + dx */
3712 ineq = omega_add_zero_geq (pb, omega_black);
3713 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3714 pb->geqs[ineq].coef[i + 1] = 1;
3715
3716 if (nbi != -1)
3717 {
3718 /* loop_x <= nb_iters */
3719 ineq = omega_add_zero_geq (pb, omega_black);
3720 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3721 pb->geqs[ineq].coef[0] = nbi;
3722
3723 /* loop_x + dx <= nb_iters */
3724 ineq = omega_add_zero_geq (pb, omega_black);
3725 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3726 pb->geqs[ineq].coef[i + 1] = -1;
3727 pb->geqs[ineq].coef[0] = nbi;
3728
3729 /* A step "dx" bigger than nb_iters is not feasible, so
3730 add "0 <= nb_iters + dx", */
3731 ineq = omega_add_zero_geq (pb, omega_black);
3732 pb->geqs[ineq].coef[i + 1] = 1;
3733 pb->geqs[ineq].coef[0] = nbi;
3734 /* and "dx <= nb_iters". */
3735 ineq = omega_add_zero_geq (pb, omega_black);
3736 pb->geqs[ineq].coef[i + 1] = -1;
3737 pb->geqs[ineq].coef[0] = nbi;
3738 }
3739 }
3740
3741 omega_extract_distance_vectors (pb, ddr);
3742
3743 return true;
3744 }
3745
3746 /* Sets up the Omega dependence problem for the data dependence
3747 relation DDR. Returns false when the constraint system cannot be
3748 built, ie. when the test answers "don't know". Returns true
3749 otherwise, and when independence has been proved (using one of the
3750 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3751 set MAYBE_DEPENDENT to true.
3752
3753 Example: for setting up the dependence system corresponding to the
3754 conflicting accesses
3755
3756 | loop_i
3757 | loop_j
3758 | A[i, i+1] = ...
3759 | ... A[2*j, 2*(i + j)]
3760 | endloop_j
3761 | endloop_i
3762
3763 the following constraints come from the iteration domain:
3764
3765 0 <= i <= Ni
3766 0 <= i + di <= Ni
3767 0 <= j <= Nj
3768 0 <= j + dj <= Nj
3769
3770 where di, dj are the distance variables. The constraints
3771 representing the conflicting elements are:
3772
3773 i = 2 * (j + dj)
3774 i + 1 = 2 * (i + di + j + dj)
3775
3776 For asking that the resulting distance vector (di, dj) be
3777 lexicographically positive, we insert the constraint "di >= 0". If
3778 "di = 0" in the solution, we fix that component to zero, and we
3779 look at the inner loops: we set a new problem where all the outer
3780 loop distances are zero, and fix this inner component to be
3781 positive. When one of the components is positive, we save that
3782 distance, and set a new problem where the distance on this loop is
3783 zero, searching for other distances in the inner loops. Here is
3784 the classic example that illustrates that we have to set for each
3785 inner loop a new problem:
3786
3787 | loop_1
3788 | loop_2
3789 | A[10]
3790 | endloop_2
3791 | endloop_1
3792
3793 we have to save two distances (1, 0) and (0, 1).
3794
3795 Given two array references, refA and refB, we have to set the
3796 dependence problem twice, refA vs. refB and refB vs. refA, and we
3797 cannot do a single test, as refB might occur before refA in the
3798 inner loops, and the contrary when considering outer loops: ex.
3799
3800 | loop_0
3801 | loop_1
3802 | loop_2
3803 | T[{1,+,1}_2][{1,+,1}_1] // refA
3804 | T[{2,+,1}_2][{0,+,1}_1] // refB
3805 | endloop_2
3806 | endloop_1
3807 | endloop_0
3808
3809 refB touches the elements in T before refA, and thus for the same
3810 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3811 but for successive loop_0 iterations, we have (1, -1, 1)
3812
3813 The Omega solver expects the distance variables ("di" in the
3814 previous example) to come first in the constraint system (as
3815 variables to be protected, or "safe" variables), the constraint
3816 system is built using the following layout:
3817
3818 "cst | distance vars | index vars".
3819 */
3820
3821 static bool
3822 init_omega_for_ddr (struct data_dependence_relation *ddr,
3823 bool *maybe_dependent)
3824 {
3825 omega_pb pb;
3826 bool res = false;
3827
3828 *maybe_dependent = true;
3829
3830 if (same_access_functions (ddr))
3831 {
3832 unsigned j;
3833 lambda_vector dir_v;
3834
3835 /* Save the 0 vector. */
3836 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3837 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3838 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3839 dir_v[j] = dir_equal;
3840 save_dir_v (ddr, dir_v);
3841
3842 /* Save the dependences carried by outer loops. */
3843 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3844 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3845 maybe_dependent);
3846 omega_free_problem (pb);
3847 return res;
3848 }
3849
3850 /* Omega expects the protected variables (those that have to be kept
3851 after elimination) to appear first in the constraint system.
3852 These variables are the distance variables. In the following
3853 initialization we declare NB_LOOPS safe variables, and the total
3854 number of variables for the constraint system is 2*NB_LOOPS. */
3855 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3856 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3857 maybe_dependent);
3858 omega_free_problem (pb);
3859
3860 /* Stop computation if not decidable, or no dependence. */
3861 if (res == false || *maybe_dependent == false)
3862 return res;
3863
3864 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3865 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3866 maybe_dependent);
3867 omega_free_problem (pb);
3868
3869 return res;
3870 }
3871
3872 /* Return true when DDR contains the same information as that stored
3873 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3874
3875 static bool
3876 ddr_consistent_p (FILE *file,
3877 struct data_dependence_relation *ddr,
3878 VEC (lambda_vector, heap) *dist_vects,
3879 VEC (lambda_vector, heap) *dir_vects)
3880 {
3881 unsigned int i, j;
3882
3883 /* If dump_file is set, output there. */
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3885 file = dump_file;
3886
3887 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3888 {
3889 lambda_vector b_dist_v;
3890 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3891 VEC_length (lambda_vector, dist_vects),
3892 DDR_NUM_DIST_VECTS (ddr));
3893
3894 fprintf (file, "Banerjee dist vectors:\n");
3895 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3896 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3897
3898 fprintf (file, "Omega dist vectors:\n");
3899 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3900 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3901
3902 fprintf (file, "data dependence relation:\n");
3903 dump_data_dependence_relation (file, ddr);
3904
3905 fprintf (file, ")\n");
3906 return false;
3907 }
3908
3909 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3910 {
3911 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3912 VEC_length (lambda_vector, dir_vects),
3913 DDR_NUM_DIR_VECTS (ddr));
3914 return false;
3915 }
3916
3917 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3918 {
3919 lambda_vector a_dist_v;
3920 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3921
3922 /* Distance vectors are not ordered in the same way in the DDR
3923 and in the DIST_VECTS: search for a matching vector. */
3924 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3925 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3926 break;
3927
3928 if (j == VEC_length (lambda_vector, dist_vects))
3929 {
3930 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3931 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3932 fprintf (file, "not found in Omega dist vectors:\n");
3933 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3934 fprintf (file, "data dependence relation:\n");
3935 dump_data_dependence_relation (file, ddr);
3936 fprintf (file, ")\n");
3937 }
3938 }
3939
3940 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3941 {
3942 lambda_vector a_dir_v;
3943 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3944
3945 /* Direction vectors are not ordered in the same way in the DDR
3946 and in the DIR_VECTS: search for a matching vector. */
3947 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3948 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3949 break;
3950
3951 if (j == VEC_length (lambda_vector, dist_vects))
3952 {
3953 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3954 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3955 fprintf (file, "not found in Omega dir vectors:\n");
3956 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3957 fprintf (file, "data dependence relation:\n");
3958 dump_data_dependence_relation (file, ddr);
3959 fprintf (file, ")\n");
3960 }
3961 }
3962
3963 return true;
3964 }
3965
3966 /* This computes the affine dependence relation between A and B with
3967 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3968 independence between two accesses, while CHREC_DONT_KNOW is used
3969 for representing the unknown relation.
3970
3971 Note that it is possible to stop the computation of the dependence
3972 relation the first time we detect a CHREC_KNOWN element for a given
3973 subscript. */
3974
3975 static void
3976 compute_affine_dependence (struct data_dependence_relation *ddr,
3977 struct loop *loop_nest)
3978 {
3979 struct data_reference *dra = DDR_A (ddr);
3980 struct data_reference *drb = DDR_B (ddr);
3981
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3983 {
3984 fprintf (dump_file, "(compute_affine_dependence\n");
3985 fprintf (dump_file, " (stmt_a = \n");
3986 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3987 fprintf (dump_file, ")\n (stmt_b = \n");
3988 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3989 fprintf (dump_file, ")\n");
3990 }
3991
3992 /* Analyze only when the dependence relation is not yet known. */
3993 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3994 && !DDR_SELF_REFERENCE (ddr))
3995 {
3996 dependence_stats.num_dependence_tests++;
3997
3998 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3999 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4000 {
4001 if (flag_check_data_deps)
4002 {
4003 /* Compute the dependences using the first algorithm. */
4004 subscript_dependence_tester (ddr, loop_nest);
4005
4006 if (dump_file && (dump_flags & TDF_DETAILS))
4007 {
4008 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4009 dump_data_dependence_relation (dump_file, ddr);
4010 }
4011
4012 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4013 {
4014 bool maybe_dependent;
4015 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4016
4017 /* Save the result of the first DD analyzer. */
4018 dist_vects = DDR_DIST_VECTS (ddr);
4019 dir_vects = DDR_DIR_VECTS (ddr);
4020
4021 /* Reset the information. */
4022 DDR_DIST_VECTS (ddr) = NULL;
4023 DDR_DIR_VECTS (ddr) = NULL;
4024
4025 /* Compute the same information using Omega. */
4026 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4027 goto csys_dont_know;
4028
4029 if (dump_file && (dump_flags & TDF_DETAILS))
4030 {
4031 fprintf (dump_file, "Omega Analyzer\n");
4032 dump_data_dependence_relation (dump_file, ddr);
4033 }
4034
4035 /* Check that we get the same information. */
4036 if (maybe_dependent)
4037 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4038 dir_vects));
4039 }
4040 }
4041 else
4042 subscript_dependence_tester (ddr, loop_nest);
4043 }
4044
4045 /* As a last case, if the dependence cannot be determined, or if
4046 the dependence is considered too difficult to determine, answer
4047 "don't know". */
4048 else
4049 {
4050 csys_dont_know:;
4051 dependence_stats.num_dependence_undetermined++;
4052
4053 if (dump_file && (dump_flags & TDF_DETAILS))
4054 {
4055 fprintf (dump_file, "Data ref a:\n");
4056 dump_data_reference (dump_file, dra);
4057 fprintf (dump_file, "Data ref b:\n");
4058 dump_data_reference (dump_file, drb);
4059 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4060 }
4061 finalize_ddr_dependent (ddr, chrec_dont_know);
4062 }
4063 }
4064
4065 if (dump_file && (dump_flags & TDF_DETAILS))
4066 fprintf (dump_file, ")\n");
4067 }
4068
4069 /* This computes the dependence relation for the same data
4070 reference into DDR. */
4071
4072 static void
4073 compute_self_dependence (struct data_dependence_relation *ddr)
4074 {
4075 unsigned int i;
4076 struct subscript *subscript;
4077
4078 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4079 return;
4080
4081 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4082 i++)
4083 {
4084 if (SUB_CONFLICTS_IN_A (subscript))
4085 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4086 if (SUB_CONFLICTS_IN_B (subscript))
4087 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4088
4089 /* The accessed index overlaps for each iteration. */
4090 SUB_CONFLICTS_IN_A (subscript)
4091 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4092 SUB_CONFLICTS_IN_B (subscript)
4093 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4094 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4095 }
4096
4097 /* The distance vector is the zero vector. */
4098 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4099 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4100 }
4101
4102 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4103 the data references in DATAREFS, in the LOOP_NEST. When
4104 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4105 relations. */
4106
4107 void
4108 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4109 VEC (ddr_p, heap) **dependence_relations,
4110 VEC (loop_p, heap) *loop_nest,
4111 bool compute_self_and_rr)
4112 {
4113 struct data_dependence_relation *ddr;
4114 struct data_reference *a, *b;
4115 unsigned int i, j;
4116
4117 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4118 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4119 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4120 {
4121 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4122 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4123 if (loop_nest)
4124 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4125 }
4126
4127 if (compute_self_and_rr)
4128 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4129 {
4130 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4131 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4132 compute_self_dependence (ddr);
4133 }
4134 }
4135
4136 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4137 true if STMT clobbers memory, false otherwise. */
4138
4139 bool
4140 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4141 {
4142 bool clobbers_memory = false;
4143 data_ref_loc *ref;
4144 tree *op0, *op1;
4145 enum gimple_code stmt_code = gimple_code (stmt);
4146
4147 *references = NULL;
4148
4149 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4150 Calls have side-effects, except those to const or pure
4151 functions. */
4152 if ((stmt_code == GIMPLE_CALL
4153 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4154 || (stmt_code == GIMPLE_ASM
4155 && gimple_asm_volatile_p (stmt)))
4156 clobbers_memory = true;
4157
4158 if (!gimple_vuse (stmt))
4159 return clobbers_memory;
4160
4161 if (stmt_code == GIMPLE_ASSIGN)
4162 {
4163 tree base;
4164 op0 = gimple_assign_lhs_ptr (stmt);
4165 op1 = gimple_assign_rhs1_ptr (stmt);
4166
4167 if (DECL_P (*op1)
4168 || (REFERENCE_CLASS_P (*op1)
4169 && (base = get_base_address (*op1))
4170 && TREE_CODE (base) != SSA_NAME))
4171 {
4172 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4173 ref->pos = op1;
4174 ref->is_read = true;
4175 }
4176
4177 if (DECL_P (*op0)
4178 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4179 {
4180 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4181 ref->pos = op0;
4182 ref->is_read = false;
4183 }
4184 }
4185 else if (stmt_code == GIMPLE_CALL)
4186 {
4187 unsigned i, n = gimple_call_num_args (stmt);
4188
4189 for (i = 0; i < n; i++)
4190 {
4191 op0 = gimple_call_arg_ptr (stmt, i);
4192
4193 if (DECL_P (*op0)
4194 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4195 {
4196 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4197 ref->pos = op0;
4198 ref->is_read = true;
4199 }
4200 }
4201 }
4202
4203 return clobbers_memory;
4204 }
4205
4206 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4207 reference, returns false, otherwise returns true. NEST is the outermost
4208 loop of the loop nest in which the references should be analyzed. */
4209
4210 bool
4211 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4212 VEC (data_reference_p, heap) **datarefs)
4213 {
4214 unsigned i;
4215 VEC (data_ref_loc, heap) *references;
4216 data_ref_loc *ref;
4217 bool ret = true;
4218 data_reference_p dr;
4219
4220 if (get_references_in_stmt (stmt, &references))
4221 {
4222 VEC_free (data_ref_loc, heap, references);
4223 return false;
4224 }
4225
4226 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4227 {
4228 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4229 *ref->pos, stmt, ref->is_read);
4230 gcc_assert (dr != NULL);
4231
4232 /* FIXME -- data dependence analysis does not work correctly for objects
4233 with invariant addresses in loop nests. Let us fail here until the
4234 problem is fixed. */
4235 if (dr_address_invariant_p (dr) && nest)
4236 {
4237 free_data_ref (dr);
4238 if (dump_file && (dump_flags & TDF_DETAILS))
4239 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4240 ret = false;
4241 break;
4242 }
4243
4244 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4245 }
4246 VEC_free (data_ref_loc, heap, references);
4247 return ret;
4248 }
4249
4250 /* Stores the data references in STMT to DATAREFS. If there is an
4251 unanalyzable reference, returns false, otherwise returns true.
4252 NEST is the outermost loop of the loop nest in which the references
4253 should be instantiated, LOOP is the loop in which the references
4254 should be analyzed. */
4255
4256 bool
4257 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4258 VEC (data_reference_p, heap) **datarefs)
4259 {
4260 unsigned i;
4261 VEC (data_ref_loc, heap) *references;
4262 data_ref_loc *ref;
4263 bool ret = true;
4264 data_reference_p dr;
4265
4266 if (get_references_in_stmt (stmt, &references))
4267 {
4268 VEC_free (data_ref_loc, heap, references);
4269 return false;
4270 }
4271
4272 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4273 {
4274 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4275 gcc_assert (dr != NULL);
4276 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4277 }
4278
4279 VEC_free (data_ref_loc, heap, references);
4280 return ret;
4281 }
4282
4283 /* Search the data references in LOOP, and record the information into
4284 DATAREFS. Returns chrec_dont_know when failing to analyze a
4285 difficult case, returns NULL_TREE otherwise. */
4286
4287 tree
4288 find_data_references_in_bb (struct loop *loop, basic_block bb,
4289 VEC (data_reference_p, heap) **datarefs)
4290 {
4291 gimple_stmt_iterator bsi;
4292
4293 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4294 {
4295 gimple stmt = gsi_stmt (bsi);
4296
4297 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4298 {
4299 struct data_reference *res;
4300 res = XCNEW (struct data_reference);
4301 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4302
4303 return chrec_dont_know;
4304 }
4305 }
4306
4307 return NULL_TREE;
4308 }
4309
4310 /* Search the data references in LOOP, and record the information into
4311 DATAREFS. Returns chrec_dont_know when failing to analyze a
4312 difficult case, returns NULL_TREE otherwise.
4313
4314 TODO: This function should be made smarter so that it can handle address
4315 arithmetic as if they were array accesses, etc. */
4316
4317 tree
4318 find_data_references_in_loop (struct loop *loop,
4319 VEC (data_reference_p, heap) **datarefs)
4320 {
4321 basic_block bb, *bbs;
4322 unsigned int i;
4323
4324 bbs = get_loop_body_in_dom_order (loop);
4325
4326 for (i = 0; i < loop->num_nodes; i++)
4327 {
4328 bb = bbs[i];
4329
4330 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4331 {
4332 free (bbs);
4333 return chrec_dont_know;
4334 }
4335 }
4336 free (bbs);
4337
4338 return NULL_TREE;
4339 }
4340
4341 /* Recursive helper function. */
4342
4343 static bool
4344 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4345 {
4346 /* Inner loops of the nest should not contain siblings. Example:
4347 when there are two consecutive loops,
4348
4349 | loop_0
4350 | loop_1
4351 | A[{0, +, 1}_1]
4352 | endloop_1
4353 | loop_2
4354 | A[{0, +, 1}_2]
4355 | endloop_2
4356 | endloop_0
4357
4358 the dependence relation cannot be captured by the distance
4359 abstraction. */
4360 if (loop->next)
4361 return false;
4362
4363 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4364 if (loop->inner)
4365 return find_loop_nest_1 (loop->inner, loop_nest);
4366 return true;
4367 }
4368
4369 /* Return false when the LOOP is not well nested. Otherwise return
4370 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4371 contain the loops from the outermost to the innermost, as they will
4372 appear in the classic distance vector. */
4373
4374 bool
4375 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4376 {
4377 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4378 if (loop->inner)
4379 return find_loop_nest_1 (loop->inner, loop_nest);
4380 return true;
4381 }
4382
4383 /* Returns true when the data dependences have been computed, false otherwise.
4384 Given a loop nest LOOP, the following vectors are returned:
4385 DATAREFS is initialized to all the array elements contained in this loop,
4386 DEPENDENCE_RELATIONS contains the relations between the data references.
4387 Compute read-read and self relations if
4388 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4389
4390 bool
4391 compute_data_dependences_for_loop (struct loop *loop,
4392 bool compute_self_and_read_read_dependences,
4393 VEC (loop_p, heap) **loop_nest,
4394 VEC (data_reference_p, heap) **datarefs,
4395 VEC (ddr_p, heap) **dependence_relations)
4396 {
4397 bool res = true;
4398
4399 memset (&dependence_stats, 0, sizeof (dependence_stats));
4400
4401 /* If the loop nest is not well formed, or one of the data references
4402 is not computable, give up without spending time to compute other
4403 dependences. */
4404 if (!loop
4405 || !find_loop_nest (loop, loop_nest)
4406 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4407 {
4408 struct data_dependence_relation *ddr;
4409
4410 /* Insert a single relation into dependence_relations:
4411 chrec_dont_know. */
4412 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4413 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4414 res = false;
4415 }
4416 else
4417 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4418 compute_self_and_read_read_dependences);
4419
4420 if (dump_file && (dump_flags & TDF_STATS))
4421 {
4422 fprintf (dump_file, "Dependence tester statistics:\n");
4423
4424 fprintf (dump_file, "Number of dependence tests: %d\n",
4425 dependence_stats.num_dependence_tests);
4426 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4427 dependence_stats.num_dependence_dependent);
4428 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4429 dependence_stats.num_dependence_independent);
4430 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4431 dependence_stats.num_dependence_undetermined);
4432
4433 fprintf (dump_file, "Number of subscript tests: %d\n",
4434 dependence_stats.num_subscript_tests);
4435 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4436 dependence_stats.num_subscript_undetermined);
4437 fprintf (dump_file, "Number of same subscript function: %d\n",
4438 dependence_stats.num_same_subscript_function);
4439
4440 fprintf (dump_file, "Number of ziv tests: %d\n",
4441 dependence_stats.num_ziv);
4442 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4443 dependence_stats.num_ziv_dependent);
4444 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4445 dependence_stats.num_ziv_independent);
4446 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4447 dependence_stats.num_ziv_unimplemented);
4448
4449 fprintf (dump_file, "Number of siv tests: %d\n",
4450 dependence_stats.num_siv);
4451 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4452 dependence_stats.num_siv_dependent);
4453 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4454 dependence_stats.num_siv_independent);
4455 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4456 dependence_stats.num_siv_unimplemented);
4457
4458 fprintf (dump_file, "Number of miv tests: %d\n",
4459 dependence_stats.num_miv);
4460 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4461 dependence_stats.num_miv_dependent);
4462 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4463 dependence_stats.num_miv_independent);
4464 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4465 dependence_stats.num_miv_unimplemented);
4466 }
4467
4468 return res;
4469 }
4470
4471 /* Returns true when the data dependences for the basic block BB have been
4472 computed, false otherwise.
4473 DATAREFS is initialized to all the array elements contained in this basic
4474 block, DEPENDENCE_RELATIONS contains the relations between the data
4475 references. Compute read-read and self relations if
4476 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4477 bool
4478 compute_data_dependences_for_bb (basic_block bb,
4479 bool compute_self_and_read_read_dependences,
4480 VEC (data_reference_p, heap) **datarefs,
4481 VEC (ddr_p, heap) **dependence_relations)
4482 {
4483 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4484 return false;
4485
4486 compute_all_dependences (*datarefs, dependence_relations, NULL,
4487 compute_self_and_read_read_dependences);
4488 return true;
4489 }
4490
4491 /* Entry point (for testing only). Analyze all the data references
4492 and the dependence relations in LOOP.
4493
4494 The data references are computed first.
4495
4496 A relation on these nodes is represented by a complete graph. Some
4497 of the relations could be of no interest, thus the relations can be
4498 computed on demand.
4499
4500 In the following function we compute all the relations. This is
4501 just a first implementation that is here for:
4502 - for showing how to ask for the dependence relations,
4503 - for the debugging the whole dependence graph,
4504 - for the dejagnu testcases and maintenance.
4505
4506 It is possible to ask only for a part of the graph, avoiding to
4507 compute the whole dependence graph. The computed dependences are
4508 stored in a knowledge base (KB) such that later queries don't
4509 recompute the same information. The implementation of this KB is
4510 transparent to the optimizer, and thus the KB can be changed with a
4511 more efficient implementation, or the KB could be disabled. */
4512 static void
4513 analyze_all_data_dependences (struct loop *loop)
4514 {
4515 unsigned int i;
4516 int nb_data_refs = 10;
4517 VEC (data_reference_p, heap) *datarefs =
4518 VEC_alloc (data_reference_p, heap, nb_data_refs);
4519 VEC (ddr_p, heap) *dependence_relations =
4520 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4521 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4522
4523 /* Compute DDs on the whole function. */
4524 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4525 &dependence_relations);
4526
4527 if (dump_file)
4528 {
4529 dump_data_dependence_relations (dump_file, dependence_relations);
4530 fprintf (dump_file, "\n\n");
4531
4532 if (dump_flags & TDF_DETAILS)
4533 dump_dist_dir_vectors (dump_file, dependence_relations);
4534
4535 if (dump_flags & TDF_STATS)
4536 {
4537 unsigned nb_top_relations = 0;
4538 unsigned nb_bot_relations = 0;
4539 unsigned nb_chrec_relations = 0;
4540 struct data_dependence_relation *ddr;
4541
4542 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4543 {
4544 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4545 nb_top_relations++;
4546
4547 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4548 nb_bot_relations++;
4549
4550 else
4551 nb_chrec_relations++;
4552 }
4553
4554 gather_stats_on_scev_database ();
4555 }
4556 }
4557
4558 VEC_free (loop_p, heap, loop_nest);
4559 free_dependence_relations (dependence_relations);
4560 free_data_refs (datarefs);
4561 }
4562
4563 /* Computes all the data dependences and check that the results of
4564 several analyzers are the same. */
4565
4566 void
4567 tree_check_data_deps (void)
4568 {
4569 loop_iterator li;
4570 struct loop *loop_nest;
4571
4572 FOR_EACH_LOOP (li, loop_nest, 0)
4573 analyze_all_data_dependences (loop_nest);
4574 }
4575
4576 /* Free the memory used by a data dependence relation DDR. */
4577
4578 void
4579 free_dependence_relation (struct data_dependence_relation *ddr)
4580 {
4581 if (ddr == NULL)
4582 return;
4583
4584 if (DDR_SUBSCRIPTS (ddr))
4585 free_subscripts (DDR_SUBSCRIPTS (ddr));
4586 if (DDR_DIST_VECTS (ddr))
4587 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4588 if (DDR_DIR_VECTS (ddr))
4589 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4590
4591 free (ddr);
4592 }
4593
4594 /* Free the memory used by the data dependence relations from
4595 DEPENDENCE_RELATIONS. */
4596
4597 void
4598 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4599 {
4600 unsigned int i;
4601 struct data_dependence_relation *ddr;
4602
4603 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4604 if (ddr)
4605 free_dependence_relation (ddr);
4606
4607 VEC_free (ddr_p, heap, dependence_relations);
4608 }
4609
4610 /* Free the memory used by the data references from DATAREFS. */
4611
4612 void
4613 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4614 {
4615 unsigned int i;
4616 struct data_reference *dr;
4617
4618 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4619 free_data_ref (dr);
4620 VEC_free (data_reference_p, heap, datarefs);
4621 }
4622
4623 \f
4624
4625 /* Dump vertex I in RDG to FILE. */
4626
4627 void
4628 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4629 {
4630 struct vertex *v = &(rdg->vertices[i]);
4631 struct graph_edge *e;
4632
4633 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4634 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4635 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4636
4637 if (v->pred)
4638 for (e = v->pred; e; e = e->pred_next)
4639 fprintf (file, " %d", e->src);
4640
4641 fprintf (file, ") (out:");
4642
4643 if (v->succ)
4644 for (e = v->succ; e; e = e->succ_next)
4645 fprintf (file, " %d", e->dest);
4646
4647 fprintf (file, ")\n");
4648 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4649 fprintf (file, ")\n");
4650 }
4651
4652 /* Call dump_rdg_vertex on stderr. */
4653
4654 DEBUG_FUNCTION void
4655 debug_rdg_vertex (struct graph *rdg, int i)
4656 {
4657 dump_rdg_vertex (stderr, rdg, i);
4658 }
4659
4660 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4661 dumped vertices to that bitmap. */
4662
4663 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4664 {
4665 int i;
4666
4667 fprintf (file, "(%d\n", c);
4668
4669 for (i = 0; i < rdg->n_vertices; i++)
4670 if (rdg->vertices[i].component == c)
4671 {
4672 if (dumped)
4673 bitmap_set_bit (dumped, i);
4674
4675 dump_rdg_vertex (file, rdg, i);
4676 }
4677
4678 fprintf (file, ")\n");
4679 }
4680
4681 /* Call dump_rdg_vertex on stderr. */
4682
4683 DEBUG_FUNCTION void
4684 debug_rdg_component (struct graph *rdg, int c)
4685 {
4686 dump_rdg_component (stderr, rdg, c, NULL);
4687 }
4688
4689 /* Dump the reduced dependence graph RDG to FILE. */
4690
4691 void
4692 dump_rdg (FILE *file, struct graph *rdg)
4693 {
4694 int i;
4695 bitmap dumped = BITMAP_ALLOC (NULL);
4696
4697 fprintf (file, "(rdg\n");
4698
4699 for (i = 0; i < rdg->n_vertices; i++)
4700 if (!bitmap_bit_p (dumped, i))
4701 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4702
4703 fprintf (file, ")\n");
4704 BITMAP_FREE (dumped);
4705 }
4706
4707 /* Call dump_rdg on stderr. */
4708
4709 DEBUG_FUNCTION void
4710 debug_rdg (struct graph *rdg)
4711 {
4712 dump_rdg (stderr, rdg);
4713 }
4714
4715 static void
4716 dot_rdg_1 (FILE *file, struct graph *rdg)
4717 {
4718 int i;
4719
4720 fprintf (file, "digraph RDG {\n");
4721
4722 for (i = 0; i < rdg->n_vertices; i++)
4723 {
4724 struct vertex *v = &(rdg->vertices[i]);
4725 struct graph_edge *e;
4726
4727 /* Highlight reads from memory. */
4728 if (RDG_MEM_READS_STMT (rdg, i))
4729 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4730
4731 /* Highlight stores to memory. */
4732 if (RDG_MEM_WRITE_STMT (rdg, i))
4733 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4734
4735 if (v->succ)
4736 for (e = v->succ; e; e = e->succ_next)
4737 switch (RDGE_TYPE (e))
4738 {
4739 case input_dd:
4740 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4741 break;
4742
4743 case output_dd:
4744 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4745 break;
4746
4747 case flow_dd:
4748 /* These are the most common dependences: don't print these. */
4749 fprintf (file, "%d -> %d \n", i, e->dest);
4750 break;
4751
4752 case anti_dd:
4753 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4754 break;
4755
4756 default:
4757 gcc_unreachable ();
4758 }
4759 }
4760
4761 fprintf (file, "}\n\n");
4762 }
4763
4764 /* Display the Reduced Dependence Graph using dotty. */
4765 extern void dot_rdg (struct graph *);
4766
4767 DEBUG_FUNCTION void
4768 dot_rdg (struct graph *rdg)
4769 {
4770 /* When debugging, enable the following code. This cannot be used
4771 in production compilers because it calls "system". */
4772 #if 0
4773 FILE *file = fopen ("/tmp/rdg.dot", "w");
4774 gcc_assert (file != NULL);
4775
4776 dot_rdg_1 (file, rdg);
4777 fclose (file);
4778
4779 system ("dotty /tmp/rdg.dot &");
4780 #else
4781 dot_rdg_1 (stderr, rdg);
4782 #endif
4783 }
4784
4785 /* This structure is used for recording the mapping statement index in
4786 the RDG. */
4787
4788 struct GTY(()) rdg_vertex_info
4789 {
4790 gimple stmt;
4791 int index;
4792 };
4793
4794 /* Returns the index of STMT in RDG. */
4795
4796 int
4797 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4798 {
4799 struct rdg_vertex_info rvi, *slot;
4800
4801 rvi.stmt = stmt;
4802 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4803
4804 if (!slot)
4805 return -1;
4806
4807 return slot->index;
4808 }
4809
4810 /* Creates an edge in RDG for each distance vector from DDR. The
4811 order that we keep track of in the RDG is the order in which
4812 statements have to be executed. */
4813
4814 static void
4815 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4816 {
4817 struct graph_edge *e;
4818 int va, vb;
4819 data_reference_p dra = DDR_A (ddr);
4820 data_reference_p drb = DDR_B (ddr);
4821 unsigned level = ddr_dependence_level (ddr);
4822
4823 /* For non scalar dependences, when the dependence is REVERSED,
4824 statement B has to be executed before statement A. */
4825 if (level > 0
4826 && !DDR_REVERSED_P (ddr))
4827 {
4828 data_reference_p tmp = dra;
4829 dra = drb;
4830 drb = tmp;
4831 }
4832
4833 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4834 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4835
4836 if (va < 0 || vb < 0)
4837 return;
4838
4839 e = add_edge (rdg, va, vb);
4840 e->data = XNEW (struct rdg_edge);
4841
4842 RDGE_LEVEL (e) = level;
4843 RDGE_RELATION (e) = ddr;
4844
4845 /* Determines the type of the data dependence. */
4846 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4847 RDGE_TYPE (e) = input_dd;
4848 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4849 RDGE_TYPE (e) = output_dd;
4850 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4851 RDGE_TYPE (e) = flow_dd;
4852 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4853 RDGE_TYPE (e) = anti_dd;
4854 }
4855
4856 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4857 the index of DEF in RDG. */
4858
4859 static void
4860 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4861 {
4862 use_operand_p imm_use_p;
4863 imm_use_iterator iterator;
4864
4865 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4866 {
4867 struct graph_edge *e;
4868 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4869
4870 if (use < 0)
4871 continue;
4872
4873 e = add_edge (rdg, idef, use);
4874 e->data = XNEW (struct rdg_edge);
4875 RDGE_TYPE (e) = flow_dd;
4876 RDGE_RELATION (e) = NULL;
4877 }
4878 }
4879
4880 /* Creates the edges of the reduced dependence graph RDG. */
4881
4882 static void
4883 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4884 {
4885 int i;
4886 struct data_dependence_relation *ddr;
4887 def_operand_p def_p;
4888 ssa_op_iter iter;
4889
4890 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4891 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4892 create_rdg_edge_for_ddr (rdg, ddr);
4893
4894 for (i = 0; i < rdg->n_vertices; i++)
4895 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4896 iter, SSA_OP_DEF)
4897 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4898 }
4899
4900 /* Build the vertices of the reduced dependence graph RDG. */
4901
4902 void
4903 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4904 {
4905 int i, j;
4906 gimple stmt;
4907
4908 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4909 {
4910 VEC (data_ref_loc, heap) *references;
4911 data_ref_loc *ref;
4912 struct vertex *v = &(rdg->vertices[i]);
4913 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4914 struct rdg_vertex_info **slot;
4915
4916 rvi->stmt = stmt;
4917 rvi->index = i;
4918 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4919
4920 if (!*slot)
4921 *slot = rvi;
4922 else
4923 free (rvi);
4924
4925 v->data = XNEW (struct rdg_vertex);
4926 RDG_STMT (rdg, i) = stmt;
4927
4928 RDG_MEM_WRITE_STMT (rdg, i) = false;
4929 RDG_MEM_READS_STMT (rdg, i) = false;
4930 if (gimple_code (stmt) == GIMPLE_PHI)
4931 continue;
4932
4933 get_references_in_stmt (stmt, &references);
4934 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4935 if (!ref->is_read)
4936 RDG_MEM_WRITE_STMT (rdg, i) = true;
4937 else
4938 RDG_MEM_READS_STMT (rdg, i) = true;
4939
4940 VEC_free (data_ref_loc, heap, references);
4941 }
4942 }
4943
4944 /* Initialize STMTS with all the statements of LOOP. When
4945 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4946 which we discover statements is important as
4947 generate_loops_for_partition is using the same traversal for
4948 identifying statements. */
4949
4950 static void
4951 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4952 {
4953 unsigned int i;
4954 basic_block *bbs = get_loop_body_in_dom_order (loop);
4955
4956 for (i = 0; i < loop->num_nodes; i++)
4957 {
4958 basic_block bb = bbs[i];
4959 gimple_stmt_iterator bsi;
4960 gimple stmt;
4961
4962 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4963 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4964
4965 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4966 {
4967 stmt = gsi_stmt (bsi);
4968 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4969 VEC_safe_push (gimple, heap, *stmts, stmt);
4970 }
4971 }
4972
4973 free (bbs);
4974 }
4975
4976 /* Returns true when all the dependences are computable. */
4977
4978 static bool
4979 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4980 {
4981 ddr_p ddr;
4982 unsigned int i;
4983
4984 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4985 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4986 return false;
4987
4988 return true;
4989 }
4990
4991 /* Computes a hash function for element ELT. */
4992
4993 static hashval_t
4994 hash_stmt_vertex_info (const void *elt)
4995 {
4996 const struct rdg_vertex_info *const rvi =
4997 (const struct rdg_vertex_info *) elt;
4998 gimple stmt = rvi->stmt;
4999
5000 return htab_hash_pointer (stmt);
5001 }
5002
5003 /* Compares database elements E1 and E2. */
5004
5005 static int
5006 eq_stmt_vertex_info (const void *e1, const void *e2)
5007 {
5008 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5009 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5010
5011 return elt1->stmt == elt2->stmt;
5012 }
5013
5014 /* Free the element E. */
5015
5016 static void
5017 hash_stmt_vertex_del (void *e)
5018 {
5019 free (e);
5020 }
5021
5022 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5023 statement of the loop nest, and one edge per data dependence or
5024 scalar dependence. */
5025
5026 struct graph *
5027 build_empty_rdg (int n_stmts)
5028 {
5029 int nb_data_refs = 10;
5030 struct graph *rdg = new_graph (n_stmts);
5031
5032 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5033 eq_stmt_vertex_info, hash_stmt_vertex_del);
5034 return rdg;
5035 }
5036
5037 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5038 statement of the loop nest, and one edge per data dependence or
5039 scalar dependence. */
5040
5041 struct graph *
5042 build_rdg (struct loop *loop,
5043 VEC (loop_p, heap) **loop_nest,
5044 VEC (ddr_p, heap) **dependence_relations,
5045 VEC (data_reference_p, heap) **datarefs)
5046 {
5047 struct graph *rdg = NULL;
5048 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5049
5050 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5051 dependence_relations);
5052
5053 if (known_dependences_p (*dependence_relations))
5054 {
5055 stmts_from_loop (loop, &stmts);
5056 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5057 create_rdg_vertices (rdg, stmts);
5058 create_rdg_edges (rdg, *dependence_relations);
5059 }
5060
5061 VEC_free (gimple, heap, stmts);
5062 return rdg;
5063 }
5064
5065 /* Free the reduced dependence graph RDG. */
5066
5067 void
5068 free_rdg (struct graph *rdg)
5069 {
5070 int i;
5071
5072 for (i = 0; i < rdg->n_vertices; i++)
5073 {
5074 struct vertex *v = &(rdg->vertices[i]);
5075 struct graph_edge *e;
5076
5077 for (e = v->succ; e; e = e->succ_next)
5078 free (e->data);
5079
5080 free (v->data);
5081 }
5082
5083 htab_delete (rdg->indices);
5084 free_graph (rdg);
5085 }
5086
5087 /* Initialize STMTS with all the statements of LOOP that contain a
5088 store to memory. */
5089
5090 void
5091 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5092 {
5093 unsigned int i;
5094 basic_block *bbs = get_loop_body_in_dom_order (loop);
5095
5096 for (i = 0; i < loop->num_nodes; i++)
5097 {
5098 basic_block bb = bbs[i];
5099 gimple_stmt_iterator bsi;
5100
5101 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5102 if (gimple_vdef (gsi_stmt (bsi)))
5103 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5104 }
5105
5106 free (bbs);
5107 }
5108
5109 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5110 that contains a data reference on its LHS with a stride of the same
5111 size as its unit type. */
5112
5113 bool
5114 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5115 {
5116 tree op0, op1;
5117 bool res;
5118 struct data_reference *dr;
5119
5120 if (!stmt
5121 || !gimple_vdef (stmt)
5122 || !is_gimple_assign (stmt)
5123 || !gimple_assign_single_p (stmt)
5124 || !(op1 = gimple_assign_rhs1 (stmt))
5125 || !(integer_zerop (op1) || real_zerop (op1)))
5126 return false;
5127
5128 dr = XCNEW (struct data_reference);
5129 op0 = gimple_assign_lhs (stmt);
5130
5131 DR_STMT (dr) = stmt;
5132 DR_REF (dr) = op0;
5133
5134 res = dr_analyze_innermost (dr)
5135 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5136
5137 free_data_ref (dr);
5138 return res;
5139 }
5140
5141 /* Initialize STMTS with all the statements of LOOP that contain a
5142 store to memory of the form "A[i] = 0". */
5143
5144 void
5145 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5146 {
5147 unsigned int i;
5148 basic_block bb;
5149 gimple_stmt_iterator si;
5150 gimple stmt;
5151 basic_block *bbs = get_loop_body_in_dom_order (loop);
5152
5153 for (i = 0; i < loop->num_nodes; i++)
5154 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5155 if ((stmt = gsi_stmt (si))
5156 && stmt_with_adjacent_zero_store_dr_p (stmt))
5157 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5158
5159 free (bbs);
5160 }
5161
5162 /* For a data reference REF, return the declaration of its base
5163 address or NULL_TREE if the base is not determined. */
5164
5165 static inline tree
5166 ref_base_address (gimple stmt, data_ref_loc *ref)
5167 {
5168 tree base = NULL_TREE;
5169 tree base_address;
5170 struct data_reference *dr = XCNEW (struct data_reference);
5171
5172 DR_STMT (dr) = stmt;
5173 DR_REF (dr) = *ref->pos;
5174 dr_analyze_innermost (dr);
5175 base_address = DR_BASE_ADDRESS (dr);
5176
5177 if (!base_address)
5178 goto end;
5179
5180 switch (TREE_CODE (base_address))
5181 {
5182 case ADDR_EXPR:
5183 base = TREE_OPERAND (base_address, 0);
5184 break;
5185
5186 default:
5187 base = base_address;
5188 break;
5189 }
5190
5191 end:
5192 free_data_ref (dr);
5193 return base;
5194 }
5195
5196 /* Determines whether the statement from vertex V of the RDG has a
5197 definition used outside the loop that contains this statement. */
5198
5199 bool
5200 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5201 {
5202 gimple stmt = RDG_STMT (rdg, v);
5203 struct loop *loop = loop_containing_stmt (stmt);
5204 use_operand_p imm_use_p;
5205 imm_use_iterator iterator;
5206 ssa_op_iter it;
5207 def_operand_p def_p;
5208
5209 if (!loop)
5210 return true;
5211
5212 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5213 {
5214 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5215 {
5216 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5217 return true;
5218 }
5219 }
5220
5221 return false;
5222 }
5223
5224 /* Determines whether statements S1 and S2 access to similar memory
5225 locations. Two memory accesses are considered similar when they
5226 have the same base address declaration, i.e. when their
5227 ref_base_address is the same. */
5228
5229 bool
5230 have_similar_memory_accesses (gimple s1, gimple s2)
5231 {
5232 bool res = false;
5233 unsigned i, j;
5234 VEC (data_ref_loc, heap) *refs1, *refs2;
5235 data_ref_loc *ref1, *ref2;
5236
5237 get_references_in_stmt (s1, &refs1);
5238 get_references_in_stmt (s2, &refs2);
5239
5240 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5241 {
5242 tree base1 = ref_base_address (s1, ref1);
5243
5244 if (base1)
5245 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5246 if (base1 == ref_base_address (s2, ref2))
5247 {
5248 res = true;
5249 goto end;
5250 }
5251 }
5252
5253 end:
5254 VEC_free (data_ref_loc, heap, refs1);
5255 VEC_free (data_ref_loc, heap, refs2);
5256 return res;
5257 }
5258
5259 /* Helper function for the hashtab. */
5260
5261 static int
5262 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5263 {
5264 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5265 CONST_CAST_GIMPLE ((const_gimple) s2));
5266 }
5267
5268 /* Helper function for the hashtab. */
5269
5270 static hashval_t
5271 ref_base_address_1 (const void *s)
5272 {
5273 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5274 unsigned i;
5275 VEC (data_ref_loc, heap) *refs;
5276 data_ref_loc *ref;
5277 hashval_t res = 0;
5278
5279 get_references_in_stmt (stmt, &refs);
5280
5281 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5282 if (!ref->is_read)
5283 {
5284 res = htab_hash_pointer (ref_base_address (stmt, ref));
5285 break;
5286 }
5287
5288 VEC_free (data_ref_loc, heap, refs);
5289 return res;
5290 }
5291
5292 /* Try to remove duplicated write data references from STMTS. */
5293
5294 void
5295 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5296 {
5297 unsigned i;
5298 gimple stmt;
5299 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5300 have_similar_memory_accesses_1, NULL);
5301
5302 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5303 {
5304 void **slot;
5305
5306 slot = htab_find_slot (seen, stmt, INSERT);
5307
5308 if (*slot)
5309 VEC_ordered_remove (gimple, *stmts, i);
5310 else
5311 {
5312 *slot = (void *) stmt;
5313 i++;
5314 }
5315 }
5316
5317 htab_delete (seen);
5318 }
5319
5320 /* Returns the index of PARAMETER in the parameters vector of the
5321 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5322
5323 int
5324 access_matrix_get_index_for_parameter (tree parameter,
5325 struct access_matrix *access_matrix)
5326 {
5327 int i;
5328 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5329 tree lambda_parameter;
5330
5331 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5332 if (lambda_parameter == parameter)
5333 return i + AM_NB_INDUCTION_VARS (access_matrix);
5334
5335 return -1;
5336 }