avr.c: Move definition of TARGET macros to end of file.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88 #include "params.h"
89
90 static struct datadep_stats
91 {
92 int num_dependence_tests;
93 int num_dependence_dependent;
94 int num_dependence_independent;
95 int num_dependence_undetermined;
96
97 int num_subscript_tests;
98 int num_subscript_undetermined;
99 int num_same_subscript_function;
100
101 int num_ziv;
102 int num_ziv_independent;
103 int num_ziv_dependent;
104 int num_ziv_unimplemented;
105
106 int num_siv;
107 int num_siv_independent;
108 int num_siv_dependent;
109 int num_siv_unimplemented;
110
111 int num_miv;
112 int num_miv_independent;
113 int num_miv_dependent;
114 int num_miv_unimplemented;
115 } dependence_stats;
116
117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
118 struct data_reference *,
119 struct data_reference *,
120 struct loop *);
121 /* Returns true iff A divides B. */
122
123 static inline bool
124 tree_fold_divides_p (const_tree a, const_tree b)
125 {
126 gcc_assert (TREE_CODE (a) == INTEGER_CST);
127 gcc_assert (TREE_CODE (b) == INTEGER_CST);
128 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 }
130
131 /* Returns true iff A divides B. */
132
133 static inline bool
134 int_divides_p (int a, int b)
135 {
136 return ((b % a) == 0);
137 }
138
139 \f
140
141 /* Dump into FILE all the data references from DATAREFS. */
142
143 void
144 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 {
146 unsigned int i;
147 struct data_reference *dr;
148
149 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
150 dump_data_reference (file, dr);
151 }
152
153 /* Dump into STDERR all the data references from DATAREFS. */
154
155 DEBUG_FUNCTION void
156 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 {
158 dump_data_references (stderr, datarefs);
159 }
160
161 /* Dump to STDERR all the dependence relations from DDRS. */
162
163 DEBUG_FUNCTION void
164 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 {
166 dump_data_dependence_relations (stderr, ddrs);
167 }
168
169 /* Dump into FILE all the dependence relations from DDRS. */
170
171 void
172 dump_data_dependence_relations (FILE *file,
173 VEC (ddr_p, heap) *ddrs)
174 {
175 unsigned int i;
176 struct data_dependence_relation *ddr;
177
178 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
179 dump_data_dependence_relation (file, ddr);
180 }
181
182 /* Print to STDERR the data_reference DR. */
183
184 DEBUG_FUNCTION void
185 debug_data_reference (struct data_reference *dr)
186 {
187 dump_data_reference (stderr, dr);
188 }
189
190 /* Dump function for a DATA_REFERENCE structure. */
191
192 void
193 dump_data_reference (FILE *outf,
194 struct data_reference *dr)
195 {
196 unsigned int i;
197
198 fprintf (outf, "#(Data Ref: \n");
199 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
200 fprintf (outf, "# stmt: ");
201 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
202 fprintf (outf, "# ref: ");
203 print_generic_stmt (outf, DR_REF (dr), 0);
204 fprintf (outf, "# base_object: ");
205 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206
207 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 {
209 fprintf (outf, "# Access function %d: ", i);
210 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 }
212 fprintf (outf, "#)\n");
213 }
214
215 /* Dumps the affine function described by FN to the file OUTF. */
216
217 static void
218 dump_affine_function (FILE *outf, affine_fn fn)
219 {
220 unsigned i;
221 tree coef;
222
223 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
224 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 {
226 fprintf (outf, " + ");
227 print_generic_expr (outf, coef, TDF_SLIM);
228 fprintf (outf, " * x_%u", i);
229 }
230 }
231
232 /* Dumps the conflict function CF to the file OUTF. */
233
234 static void
235 dump_conflict_function (FILE *outf, conflict_function *cf)
236 {
237 unsigned i;
238
239 if (cf->n == NO_DEPENDENCE)
240 fprintf (outf, "no dependence\n");
241 else if (cf->n == NOT_KNOWN)
242 fprintf (outf, "not known\n");
243 else
244 {
245 for (i = 0; i < cf->n; i++)
246 {
247 fprintf (outf, "[");
248 dump_affine_function (outf, cf->fns[i]);
249 fprintf (outf, "]\n");
250 }
251 }
252 }
253
254 /* Dump function for a SUBSCRIPT structure. */
255
256 void
257 dump_subscript (FILE *outf, struct subscript *subscript)
258 {
259 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260
261 fprintf (outf, "\n (subscript \n");
262 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
263 dump_conflict_function (outf, cf);
264 if (CF_NONTRIVIAL_P (cf))
265 {
266 tree last_iteration = SUB_LAST_CONFLICT (subscript);
267 fprintf (outf, " last_conflict: ");
268 print_generic_stmt (outf, last_iteration, 0);
269 }
270
271 cf = SUB_CONFLICTS_IN_B (subscript);
272 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
273 dump_conflict_function (outf, cf);
274 if (CF_NONTRIVIAL_P (cf))
275 {
276 tree last_iteration = SUB_LAST_CONFLICT (subscript);
277 fprintf (outf, " last_conflict: ");
278 print_generic_stmt (outf, last_iteration, 0);
279 }
280
281 fprintf (outf, " (Subscript distance: ");
282 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
283 fprintf (outf, " )\n");
284 fprintf (outf, " )\n");
285 }
286
287 /* Print the classic direction vector DIRV to OUTF. */
288
289 void
290 print_direction_vector (FILE *outf,
291 lambda_vector dirv,
292 int length)
293 {
294 int eq;
295
296 for (eq = 0; eq < length; eq++)
297 {
298 enum data_dependence_direction dir = ((enum data_dependence_direction)
299 dirv[eq]);
300
301 switch (dir)
302 {
303 case dir_positive:
304 fprintf (outf, " +");
305 break;
306 case dir_negative:
307 fprintf (outf, " -");
308 break;
309 case dir_equal:
310 fprintf (outf, " =");
311 break;
312 case dir_positive_or_equal:
313 fprintf (outf, " +=");
314 break;
315 case dir_positive_or_negative:
316 fprintf (outf, " +-");
317 break;
318 case dir_negative_or_equal:
319 fprintf (outf, " -=");
320 break;
321 case dir_star:
322 fprintf (outf, " *");
323 break;
324 default:
325 fprintf (outf, "indep");
326 break;
327 }
328 }
329 fprintf (outf, "\n");
330 }
331
332 /* Print a vector of direction vectors. */
333
334 void
335 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
336 int length)
337 {
338 unsigned j;
339 lambda_vector v;
340
341 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
342 print_direction_vector (outf, v, length);
343 }
344
345 /* Print out a vector VEC of length N to OUTFILE. */
346
347 static inline void
348 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
349 {
350 int i;
351
352 for (i = 0; i < n; i++)
353 fprintf (outfile, "%3d ", vector[i]);
354 fprintf (outfile, "\n");
355 }
356
357 /* Print a vector of distance vectors. */
358
359 void
360 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
361 int length)
362 {
363 unsigned j;
364 lambda_vector v;
365
366 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
367 print_lambda_vector (outf, v, length);
368 }
369
370 /* Debug version. */
371
372 DEBUG_FUNCTION void
373 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 {
375 dump_data_dependence_relation (stderr, ddr);
376 }
377
378 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379
380 void
381 dump_data_dependence_relation (FILE *outf,
382 struct data_dependence_relation *ddr)
383 {
384 struct data_reference *dra, *drb;
385
386 fprintf (outf, "(Data Dep: \n");
387
388 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389 {
390 if (ddr)
391 {
392 dra = DDR_A (ddr);
393 drb = DDR_B (ddr);
394 if (dra)
395 dump_data_reference (outf, dra);
396 else
397 fprintf (outf, " (nil)\n");
398 if (drb)
399 dump_data_reference (outf, drb);
400 else
401 fprintf (outf, " (nil)\n");
402 }
403 fprintf (outf, " (don't know)\n)\n");
404 return;
405 }
406
407 dra = DDR_A (ddr);
408 drb = DDR_B (ddr);
409 dump_data_reference (outf, dra);
410 dump_data_reference (outf, drb);
411
412 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
413 fprintf (outf, " (no dependence)\n");
414
415 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
416 {
417 unsigned int i;
418 struct loop *loopi;
419
420 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 {
422 fprintf (outf, " access_fn_A: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
424 fprintf (outf, " access_fn_B: ");
425 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
426 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 }
428
429 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
430 fprintf (outf, " loop nest: (");
431 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
432 fprintf (outf, "%d ", loopi->num);
433 fprintf (outf, ")\n");
434
435 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 {
437 fprintf (outf, " distance_vector: ");
438 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
439 DDR_NB_LOOPS (ddr));
440 }
441
442 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 {
444 fprintf (outf, " direction_vector: ");
445 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
446 DDR_NB_LOOPS (ddr));
447 }
448 }
449
450 fprintf (outf, ")\n");
451 }
452
453 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454
455 void
456 dump_data_dependence_direction (FILE *file,
457 enum data_dependence_direction dir)
458 {
459 switch (dir)
460 {
461 case dir_positive:
462 fprintf (file, "+");
463 break;
464
465 case dir_negative:
466 fprintf (file, "-");
467 break;
468
469 case dir_equal:
470 fprintf (file, "=");
471 break;
472
473 case dir_positive_or_negative:
474 fprintf (file, "+-");
475 break;
476
477 case dir_positive_or_equal:
478 fprintf (file, "+=");
479 break;
480
481 case dir_negative_or_equal:
482 fprintf (file, "-=");
483 break;
484
485 case dir_star:
486 fprintf (file, "*");
487 break;
488
489 default:
490 break;
491 }
492 }
493
494 /* Dumps the distance and direction vectors in FILE. DDRS contains
495 the dependence relations, and VECT_SIZE is the size of the
496 dependence vectors, or in other words the number of loops in the
497 considered nest. */
498
499 void
500 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 {
502 unsigned int i, j;
503 struct data_dependence_relation *ddr;
504 lambda_vector v;
505
506 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
507 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 {
509 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 {
511 fprintf (file, "DISTANCE_V (");
512 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
513 fprintf (file, ")\n");
514 }
515
516 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 {
518 fprintf (file, "DIRECTION_V (");
519 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
520 fprintf (file, ")\n");
521 }
522 }
523
524 fprintf (file, "\n\n");
525 }
526
527 /* Dumps the data dependence relations DDRS in FILE. */
528
529 void
530 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 {
532 unsigned int i;
533 struct data_dependence_relation *ddr;
534
535 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
536 dump_data_dependence_relation (file, ddr);
537
538 fprintf (file, "\n\n");
539 }
540
541 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
542 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
543 constant of type ssizetype, and returns true. If we cannot do this
544 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
545 is returned. */
546
547 static bool
548 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
549 tree *var, tree *off)
550 {
551 tree var0, var1;
552 tree off0, off1;
553 enum tree_code ocode = code;
554
555 *var = NULL_TREE;
556 *off = NULL_TREE;
557
558 switch (code)
559 {
560 case INTEGER_CST:
561 *var = build_int_cst (type, 0);
562 *off = fold_convert (ssizetype, op0);
563 return true;
564
565 case POINTER_PLUS_EXPR:
566 ocode = PLUS_EXPR;
567 /* FALLTHROUGH */
568 case PLUS_EXPR:
569 case MINUS_EXPR:
570 split_constant_offset (op0, &var0, &off0);
571 split_constant_offset (op1, &var1, &off1);
572 *var = fold_build2 (code, type, var0, var1);
573 *off = size_binop (ocode, off0, off1);
574 return true;
575
576 case MULT_EXPR:
577 if (TREE_CODE (op1) != INTEGER_CST)
578 return false;
579
580 split_constant_offset (op0, &var0, &off0);
581 *var = fold_build2 (MULT_EXPR, type, var0, op1);
582 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
583 return true;
584
585 case ADDR_EXPR:
586 {
587 tree base, poffset;
588 HOST_WIDE_INT pbitsize, pbitpos;
589 enum machine_mode pmode;
590 int punsignedp, pvolatilep;
591
592 op0 = TREE_OPERAND (op0, 0);
593 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
594 &pmode, &punsignedp, &pvolatilep, false);
595
596 if (pbitpos % BITS_PER_UNIT != 0)
597 return false;
598 base = build_fold_addr_expr (base);
599 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
600
601 if (poffset)
602 {
603 split_constant_offset (poffset, &poffset, &off1);
604 off0 = size_binop (PLUS_EXPR, off0, off1);
605 if (POINTER_TYPE_P (TREE_TYPE (base)))
606 base = fold_build_pointer_plus (base, poffset);
607 else
608 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
609 fold_convert (TREE_TYPE (base), poffset));
610 }
611
612 var0 = fold_convert (type, base);
613
614 /* If variable length types are involved, punt, otherwise casts
615 might be converted into ARRAY_REFs in gimplify_conversion.
616 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
617 possibly no longer appears in current GIMPLE, might resurface.
618 This perhaps could run
619 if (CONVERT_EXPR_P (var0))
620 {
621 gimplify_conversion (&var0);
622 // Attempt to fill in any within var0 found ARRAY_REF's
623 // element size from corresponding op embedded ARRAY_REF,
624 // if unsuccessful, just punt.
625 } */
626 while (POINTER_TYPE_P (type))
627 type = TREE_TYPE (type);
628 if (int_size_in_bytes (type) < 0)
629 return false;
630
631 *var = var0;
632 *off = off0;
633 return true;
634 }
635
636 case SSA_NAME:
637 {
638 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
639 enum tree_code subcode;
640
641 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
642 return false;
643
644 var0 = gimple_assign_rhs1 (def_stmt);
645 subcode = gimple_assign_rhs_code (def_stmt);
646 var1 = gimple_assign_rhs2 (def_stmt);
647
648 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
649 }
650 CASE_CONVERT:
651 {
652 /* We must not introduce undefined overflow, and we must not change the value.
653 Hence we're okay if the inner type doesn't overflow to start with
654 (pointer or signed), the outer type also is an integer or pointer
655 and the outer precision is at least as large as the inner. */
656 tree itype = TREE_TYPE (op0);
657 if ((POINTER_TYPE_P (itype)
658 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
659 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
660 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
661 {
662 split_constant_offset (op0, &var0, off);
663 *var = fold_convert (type, var0);
664 return true;
665 }
666 return false;
667 }
668
669 default:
670 return false;
671 }
672 }
673
674 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
675 will be ssizetype. */
676
677 void
678 split_constant_offset (tree exp, tree *var, tree *off)
679 {
680 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
681 enum tree_code code;
682
683 *var = exp;
684 *off = ssize_int (0);
685 STRIP_NOPS (exp);
686
687 if (tree_is_chrec (exp)
688 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
689 return;
690
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
695 {
696 *var = fold_convert (type, e);
697 *off = o;
698 }
699 }
700
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
703
704 static tree
705 canonicalize_base_object_address (tree addr)
706 {
707 tree orig = addr;
708
709 STRIP_NOPS (addr);
710
711 /* The base address may be obtained by casting from integer, in that case
712 keep the cast. */
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
714 return orig;
715
716 if (TREE_CODE (addr) != ADDR_EXPR)
717 return addr;
718
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
720 }
721
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
724 otherwise. */
725
726 bool
727 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
728 {
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
733 tree base, poffset;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
739
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
742
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
746
747 if (pbitpos % BITS_PER_UNIT != 0)
748 {
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
751 return false;
752 }
753
754 if (TREE_CODE (base) == MEM_REF)
755 {
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
757 {
758 if (!poffset)
759 {
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
762 }
763 else
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
765 }
766 base = TREE_OPERAND (base, 0);
767 }
768 else
769 base = build_fold_addr_expr (base);
770
771 if (in_loop)
772 {
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
775 {
776 if (nest)
777 {
778 if (dump_file && (dump_flags & TDF_DETAILS))
779 fprintf (dump_file, "failed: evolution of base is not"
780 " affine.\n");
781 return false;
782 }
783 else
784 {
785 base_iv.base = base;
786 base_iv.step = ssize_int (0);
787 base_iv.no_overflow = true;
788 }
789 }
790 }
791 else
792 {
793 base_iv.base = base;
794 base_iv.step = ssize_int (0);
795 base_iv.no_overflow = true;
796 }
797
798 if (!poffset)
799 {
800 offset_iv.base = ssize_int (0);
801 offset_iv.step = ssize_int (0);
802 }
803 else
804 {
805 if (!in_loop)
806 {
807 offset_iv.base = poffset;
808 offset_iv.step = ssize_int (0);
809 }
810 else if (!simple_iv (loop, loop_containing_stmt (stmt),
811 poffset, &offset_iv, false))
812 {
813 if (nest)
814 {
815 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "failed: evolution of offset is not"
817 " affine.\n");
818 return false;
819 }
820 else
821 {
822 offset_iv.base = poffset;
823 offset_iv.step = ssize_int (0);
824 }
825 }
826 }
827
828 init = ssize_int (pbitpos / BITS_PER_UNIT);
829 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
830 init = size_binop (PLUS_EXPR, init, dinit);
831 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
832 init = size_binop (PLUS_EXPR, init, dinit);
833
834 step = size_binop (PLUS_EXPR,
835 fold_convert (ssizetype, base_iv.step),
836 fold_convert (ssizetype, offset_iv.step));
837
838 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
839
840 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
841 DR_INIT (dr) = init;
842 DR_STEP (dr) = step;
843
844 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
845
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "success.\n");
848
849 return true;
850 }
851
852 /* Determines the base object and the list of indices of memory reference
853 DR, analyzed in LOOP and instantiated in loop nest NEST. */
854
855 static void
856 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
857 {
858 VEC (tree, heap) *access_fns = NULL;
859 tree ref, *aref, op;
860 tree base, off, access_fn;
861 basic_block before_loop;
862
863 /* If analyzing a basic-block there are no indices to analyze
864 and thus no access functions. */
865 if (!nest)
866 {
867 DR_BASE_OBJECT (dr) = DR_REF (dr);
868 DR_ACCESS_FNS (dr) = NULL;
869 return;
870 }
871
872 ref = unshare_expr (DR_REF (dr));
873 before_loop = block_before_loop (nest);
874
875 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
876 into a two element array with a constant index. The base is
877 then just the immediate underlying object. */
878 if (TREE_CODE (ref) == REALPART_EXPR)
879 {
880 ref = TREE_OPERAND (ref, 0);
881 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
882 }
883 else if (TREE_CODE (ref) == IMAGPART_EXPR)
884 {
885 ref = TREE_OPERAND (ref, 0);
886 VEC_safe_push (tree, heap, access_fns, integer_one_node);
887 }
888
889 /* Analyze access functions of dimensions we know to be independent. */
890 aref = &ref;
891 while (handled_component_p (*aref))
892 {
893 if (TREE_CODE (*aref) == ARRAY_REF)
894 {
895 op = TREE_OPERAND (*aref, 1);
896 access_fn = analyze_scalar_evolution (loop, op);
897 access_fn = instantiate_scev (before_loop, loop, access_fn);
898 VEC_safe_push (tree, heap, access_fns, access_fn);
899 /* For ARRAY_REFs the base is the reference with the index replaced
900 by zero if we can not strip it as the outermost component. */
901 if (*aref == ref)
902 {
903 *aref = TREE_OPERAND (*aref, 0);
904 continue;
905 }
906 else
907 TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
908 }
909
910 aref = &TREE_OPERAND (*aref, 0);
911 }
912
913 /* If the address operand of a MEM_REF base has an evolution in the
914 analyzed nest, add it as an additional independent access-function. */
915 if (TREE_CODE (*aref) == MEM_REF)
916 {
917 op = TREE_OPERAND (*aref, 0);
918 access_fn = analyze_scalar_evolution (loop, op);
919 access_fn = instantiate_scev (before_loop, loop, access_fn);
920 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
921 {
922 tree orig_type;
923 base = initial_condition (access_fn);
924 orig_type = TREE_TYPE (base);
925 STRIP_USELESS_TYPE_CONVERSION (base);
926 split_constant_offset (base, &base, &off);
927 /* Fold the MEM_REF offset into the evolutions initial
928 value to make more bases comparable. */
929 if (!integer_zerop (TREE_OPERAND (*aref, 1)))
930 {
931 off = size_binop (PLUS_EXPR, off,
932 fold_convert (ssizetype,
933 TREE_OPERAND (*aref, 1)));
934 TREE_OPERAND (*aref, 1)
935 = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
936 }
937 access_fn = chrec_replace_initial_condition
938 (access_fn, fold_convert (orig_type, off));
939 *aref = fold_build2_loc (EXPR_LOCATION (*aref),
940 MEM_REF, TREE_TYPE (*aref),
941 base, TREE_OPERAND (*aref, 1));
942 VEC_safe_push (tree, heap, access_fns, access_fn);
943 }
944 }
945
946 DR_BASE_OBJECT (dr) = ref;
947 DR_ACCESS_FNS (dr) = access_fns;
948 }
949
950 /* Extracts the alias analysis information from the memory reference DR. */
951
952 static void
953 dr_analyze_alias (struct data_reference *dr)
954 {
955 tree ref = DR_REF (dr);
956 tree base = get_base_address (ref), addr;
957
958 if (INDIRECT_REF_P (base)
959 || TREE_CODE (base) == MEM_REF)
960 {
961 addr = TREE_OPERAND (base, 0);
962 if (TREE_CODE (addr) == SSA_NAME)
963 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
964 }
965 }
966
967 /* Frees data reference DR. */
968
969 void
970 free_data_ref (data_reference_p dr)
971 {
972 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
973 free (dr);
974 }
975
976 /* Analyzes memory reference MEMREF accessed in STMT. The reference
977 is read if IS_READ is true, write otherwise. Returns the
978 data_reference description of MEMREF. NEST is the outermost loop
979 in which the reference should be instantiated, LOOP is the loop in
980 which the data reference should be analyzed. */
981
982 struct data_reference *
983 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
984 bool is_read)
985 {
986 struct data_reference *dr;
987
988 if (dump_file && (dump_flags & TDF_DETAILS))
989 {
990 fprintf (dump_file, "Creating dr for ");
991 print_generic_expr (dump_file, memref, TDF_SLIM);
992 fprintf (dump_file, "\n");
993 }
994
995 dr = XCNEW (struct data_reference);
996 DR_STMT (dr) = stmt;
997 DR_REF (dr) = memref;
998 DR_IS_READ (dr) = is_read;
999
1000 dr_analyze_innermost (dr, nest);
1001 dr_analyze_indices (dr, nest, loop);
1002 dr_analyze_alias (dr);
1003
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1005 {
1006 unsigned i;
1007 fprintf (dump_file, "\tbase_address: ");
1008 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1009 fprintf (dump_file, "\n\toffset from base address: ");
1010 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1011 fprintf (dump_file, "\n\tconstant offset from base address: ");
1012 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1013 fprintf (dump_file, "\n\tstep: ");
1014 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1015 fprintf (dump_file, "\n\taligned to: ");
1016 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1017 fprintf (dump_file, "\n\tbase_object: ");
1018 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1019 fprintf (dump_file, "\n");
1020 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1021 {
1022 fprintf (dump_file, "\tAccess function %d: ", i);
1023 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1024 }
1025 }
1026
1027 return dr;
1028 }
1029
1030 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1031 expressions. */
1032 static bool
1033 dr_equal_offsets_p1 (tree offset1, tree offset2)
1034 {
1035 bool res;
1036
1037 STRIP_NOPS (offset1);
1038 STRIP_NOPS (offset2);
1039
1040 if (offset1 == offset2)
1041 return true;
1042
1043 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1044 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1045 return false;
1046
1047 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1048 TREE_OPERAND (offset2, 0));
1049
1050 if (!res || !BINARY_CLASS_P (offset1))
1051 return res;
1052
1053 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1054 TREE_OPERAND (offset2, 1));
1055
1056 return res;
1057 }
1058
1059 /* Check if DRA and DRB have equal offsets. */
1060 bool
1061 dr_equal_offsets_p (struct data_reference *dra,
1062 struct data_reference *drb)
1063 {
1064 tree offset1, offset2;
1065
1066 offset1 = DR_OFFSET (dra);
1067 offset2 = DR_OFFSET (drb);
1068
1069 return dr_equal_offsets_p1 (offset1, offset2);
1070 }
1071
1072 /* Returns true if FNA == FNB. */
1073
1074 static bool
1075 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1076 {
1077 unsigned i, n = VEC_length (tree, fna);
1078
1079 if (n != VEC_length (tree, fnb))
1080 return false;
1081
1082 for (i = 0; i < n; i++)
1083 if (!operand_equal_p (VEC_index (tree, fna, i),
1084 VEC_index (tree, fnb, i), 0))
1085 return false;
1086
1087 return true;
1088 }
1089
1090 /* If all the functions in CF are the same, returns one of them,
1091 otherwise returns NULL. */
1092
1093 static affine_fn
1094 common_affine_function (conflict_function *cf)
1095 {
1096 unsigned i;
1097 affine_fn comm;
1098
1099 if (!CF_NONTRIVIAL_P (cf))
1100 return NULL;
1101
1102 comm = cf->fns[0];
1103
1104 for (i = 1; i < cf->n; i++)
1105 if (!affine_function_equal_p (comm, cf->fns[i]))
1106 return NULL;
1107
1108 return comm;
1109 }
1110
1111 /* Returns the base of the affine function FN. */
1112
1113 static tree
1114 affine_function_base (affine_fn fn)
1115 {
1116 return VEC_index (tree, fn, 0);
1117 }
1118
1119 /* Returns true if FN is a constant. */
1120
1121 static bool
1122 affine_function_constant_p (affine_fn fn)
1123 {
1124 unsigned i;
1125 tree coef;
1126
1127 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1128 if (!integer_zerop (coef))
1129 return false;
1130
1131 return true;
1132 }
1133
1134 /* Returns true if FN is the zero constant function. */
1135
1136 static bool
1137 affine_function_zero_p (affine_fn fn)
1138 {
1139 return (integer_zerop (affine_function_base (fn))
1140 && affine_function_constant_p (fn));
1141 }
1142
1143 /* Returns a signed integer type with the largest precision from TA
1144 and TB. */
1145
1146 static tree
1147 signed_type_for_types (tree ta, tree tb)
1148 {
1149 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1150 return signed_type_for (ta);
1151 else
1152 return signed_type_for (tb);
1153 }
1154
1155 /* Applies operation OP on affine functions FNA and FNB, and returns the
1156 result. */
1157
1158 static affine_fn
1159 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1160 {
1161 unsigned i, n, m;
1162 affine_fn ret;
1163 tree coef;
1164
1165 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1166 {
1167 n = VEC_length (tree, fna);
1168 m = VEC_length (tree, fnb);
1169 }
1170 else
1171 {
1172 n = VEC_length (tree, fnb);
1173 m = VEC_length (tree, fna);
1174 }
1175
1176 ret = VEC_alloc (tree, heap, m);
1177 for (i = 0; i < n; i++)
1178 {
1179 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1180 TREE_TYPE (VEC_index (tree, fnb, i)));
1181
1182 VEC_quick_push (tree, ret,
1183 fold_build2 (op, type,
1184 VEC_index (tree, fna, i),
1185 VEC_index (tree, fnb, i)));
1186 }
1187
1188 for (; VEC_iterate (tree, fna, i, coef); i++)
1189 VEC_quick_push (tree, ret,
1190 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1191 coef, integer_zero_node));
1192 for (; VEC_iterate (tree, fnb, i, coef); i++)
1193 VEC_quick_push (tree, ret,
1194 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1195 integer_zero_node, coef));
1196
1197 return ret;
1198 }
1199
1200 /* Returns the sum of affine functions FNA and FNB. */
1201
1202 static affine_fn
1203 affine_fn_plus (affine_fn fna, affine_fn fnb)
1204 {
1205 return affine_fn_op (PLUS_EXPR, fna, fnb);
1206 }
1207
1208 /* Returns the difference of affine functions FNA and FNB. */
1209
1210 static affine_fn
1211 affine_fn_minus (affine_fn fna, affine_fn fnb)
1212 {
1213 return affine_fn_op (MINUS_EXPR, fna, fnb);
1214 }
1215
1216 /* Frees affine function FN. */
1217
1218 static void
1219 affine_fn_free (affine_fn fn)
1220 {
1221 VEC_free (tree, heap, fn);
1222 }
1223
1224 /* Determine for each subscript in the data dependence relation DDR
1225 the distance. */
1226
1227 static void
1228 compute_subscript_distance (struct data_dependence_relation *ddr)
1229 {
1230 conflict_function *cf_a, *cf_b;
1231 affine_fn fn_a, fn_b, diff;
1232
1233 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1234 {
1235 unsigned int i;
1236
1237 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1238 {
1239 struct subscript *subscript;
1240
1241 subscript = DDR_SUBSCRIPT (ddr, i);
1242 cf_a = SUB_CONFLICTS_IN_A (subscript);
1243 cf_b = SUB_CONFLICTS_IN_B (subscript);
1244
1245 fn_a = common_affine_function (cf_a);
1246 fn_b = common_affine_function (cf_b);
1247 if (!fn_a || !fn_b)
1248 {
1249 SUB_DISTANCE (subscript) = chrec_dont_know;
1250 return;
1251 }
1252 diff = affine_fn_minus (fn_a, fn_b);
1253
1254 if (affine_function_constant_p (diff))
1255 SUB_DISTANCE (subscript) = affine_function_base (diff);
1256 else
1257 SUB_DISTANCE (subscript) = chrec_dont_know;
1258
1259 affine_fn_free (diff);
1260 }
1261 }
1262 }
1263
1264 /* Returns the conflict function for "unknown". */
1265
1266 static conflict_function *
1267 conflict_fn_not_known (void)
1268 {
1269 conflict_function *fn = XCNEW (conflict_function);
1270 fn->n = NOT_KNOWN;
1271
1272 return fn;
1273 }
1274
1275 /* Returns the conflict function for "independent". */
1276
1277 static conflict_function *
1278 conflict_fn_no_dependence (void)
1279 {
1280 conflict_function *fn = XCNEW (conflict_function);
1281 fn->n = NO_DEPENDENCE;
1282
1283 return fn;
1284 }
1285
1286 /* Returns true if the address of OBJ is invariant in LOOP. */
1287
1288 static bool
1289 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1290 {
1291 while (handled_component_p (obj))
1292 {
1293 if (TREE_CODE (obj) == ARRAY_REF)
1294 {
1295 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1296 need to check the stride and the lower bound of the reference. */
1297 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1298 loop->num)
1299 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1300 loop->num))
1301 return false;
1302 }
1303 else if (TREE_CODE (obj) == COMPONENT_REF)
1304 {
1305 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1306 loop->num))
1307 return false;
1308 }
1309 obj = TREE_OPERAND (obj, 0);
1310 }
1311
1312 if (!INDIRECT_REF_P (obj)
1313 && TREE_CODE (obj) != MEM_REF)
1314 return true;
1315
1316 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1317 loop->num);
1318 }
1319
1320 /* Returns false if we can prove that data references A and B do not alias,
1321 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1322 considered. */
1323
1324 bool
1325 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1326 bool loop_nest)
1327 {
1328 tree addr_a = DR_BASE_OBJECT (a);
1329 tree addr_b = DR_BASE_OBJECT (b);
1330
1331 /* If we are not processing a loop nest but scalar code we
1332 do not need to care about possible cross-iteration dependences
1333 and thus can process the full original reference. Do so,
1334 similar to how loop invariant motion applies extra offset-based
1335 disambiguation. */
1336 if (!loop_nest)
1337 {
1338 aff_tree off1, off2;
1339 double_int size1, size2;
1340 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1341 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1342 aff_combination_scale (&off1, double_int_minus_one);
1343 aff_combination_add (&off2, &off1);
1344 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1345 return false;
1346 }
1347
1348 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1349 return refs_output_dependent_p (addr_a, addr_b);
1350 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1351 return refs_anti_dependent_p (addr_a, addr_b);
1352 return refs_may_alias_p (addr_a, addr_b);
1353 }
1354
1355 /* Initialize a data dependence relation between data accesses A and
1356 B. NB_LOOPS is the number of loops surrounding the references: the
1357 size of the classic distance/direction vectors. */
1358
1359 struct data_dependence_relation *
1360 initialize_data_dependence_relation (struct data_reference *a,
1361 struct data_reference *b,
1362 VEC (loop_p, heap) *loop_nest)
1363 {
1364 struct data_dependence_relation *res;
1365 unsigned int i;
1366
1367 res = XNEW (struct data_dependence_relation);
1368 DDR_A (res) = a;
1369 DDR_B (res) = b;
1370 DDR_LOOP_NEST (res) = NULL;
1371 DDR_REVERSED_P (res) = false;
1372 DDR_SUBSCRIPTS (res) = NULL;
1373 DDR_DIR_VECTS (res) = NULL;
1374 DDR_DIST_VECTS (res) = NULL;
1375
1376 if (a == NULL || b == NULL)
1377 {
1378 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1379 return res;
1380 }
1381
1382 /* If the data references do not alias, then they are independent. */
1383 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1384 {
1385 DDR_ARE_DEPENDENT (res) = chrec_known;
1386 return res;
1387 }
1388
1389 /* The case where the references are exactly the same. */
1390 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1391 {
1392 if (loop_nest
1393 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1394 DR_BASE_OBJECT (a)))
1395 {
1396 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1397 return res;
1398 }
1399 DDR_AFFINE_P (res) = true;
1400 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1401 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1402 DDR_LOOP_NEST (res) = loop_nest;
1403 DDR_INNER_LOOP (res) = 0;
1404 DDR_SELF_REFERENCE (res) = true;
1405 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1406 {
1407 struct subscript *subscript;
1408
1409 subscript = XNEW (struct subscript);
1410 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1411 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1412 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1413 SUB_DISTANCE (subscript) = chrec_dont_know;
1414 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1415 }
1416 return res;
1417 }
1418
1419 /* If the references do not access the same object, we do not know
1420 whether they alias or not. */
1421 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1422 {
1423 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1424 return res;
1425 }
1426
1427 /* If the base of the object is not invariant in the loop nest, we cannot
1428 analyze it. TODO -- in fact, it would suffice to record that there may
1429 be arbitrary dependences in the loops where the base object varies. */
1430 if (loop_nest
1431 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1432 DR_BASE_OBJECT (a)))
1433 {
1434 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1435 return res;
1436 }
1437
1438 /* If the number of dimensions of the access to not agree we can have
1439 a pointer access to a component of the array element type and an
1440 array access while the base-objects are still the same. Punt. */
1441 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1442 {
1443 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1444 return res;
1445 }
1446
1447 DDR_AFFINE_P (res) = true;
1448 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1449 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1450 DDR_LOOP_NEST (res) = loop_nest;
1451 DDR_INNER_LOOP (res) = 0;
1452 DDR_SELF_REFERENCE (res) = false;
1453
1454 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1455 {
1456 struct subscript *subscript;
1457
1458 subscript = XNEW (struct subscript);
1459 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1460 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1461 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1462 SUB_DISTANCE (subscript) = chrec_dont_know;
1463 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1464 }
1465
1466 return res;
1467 }
1468
1469 /* Frees memory used by the conflict function F. */
1470
1471 static void
1472 free_conflict_function (conflict_function *f)
1473 {
1474 unsigned i;
1475
1476 if (CF_NONTRIVIAL_P (f))
1477 {
1478 for (i = 0; i < f->n; i++)
1479 affine_fn_free (f->fns[i]);
1480 }
1481 free (f);
1482 }
1483
1484 /* Frees memory used by SUBSCRIPTS. */
1485
1486 static void
1487 free_subscripts (VEC (subscript_p, heap) *subscripts)
1488 {
1489 unsigned i;
1490 subscript_p s;
1491
1492 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1493 {
1494 free_conflict_function (s->conflicting_iterations_in_a);
1495 free_conflict_function (s->conflicting_iterations_in_b);
1496 free (s);
1497 }
1498 VEC_free (subscript_p, heap, subscripts);
1499 }
1500
1501 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1502 description. */
1503
1504 static inline void
1505 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1506 tree chrec)
1507 {
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1509 {
1510 fprintf (dump_file, "(dependence classified: ");
1511 print_generic_expr (dump_file, chrec, 0);
1512 fprintf (dump_file, ")\n");
1513 }
1514
1515 DDR_ARE_DEPENDENT (ddr) = chrec;
1516 free_subscripts (DDR_SUBSCRIPTS (ddr));
1517 DDR_SUBSCRIPTS (ddr) = NULL;
1518 }
1519
1520 /* The dependence relation DDR cannot be represented by a distance
1521 vector. */
1522
1523 static inline void
1524 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1525 {
1526 if (dump_file && (dump_flags & TDF_DETAILS))
1527 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1528
1529 DDR_AFFINE_P (ddr) = false;
1530 }
1531
1532 \f
1533
1534 /* This section contains the classic Banerjee tests. */
1535
1536 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1537 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1538
1539 static inline bool
1540 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1541 {
1542 return (evolution_function_is_constant_p (chrec_a)
1543 && evolution_function_is_constant_p (chrec_b));
1544 }
1545
1546 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1547 variable, i.e., if the SIV (Single Index Variable) test is true. */
1548
1549 static bool
1550 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1551 {
1552 if ((evolution_function_is_constant_p (chrec_a)
1553 && evolution_function_is_univariate_p (chrec_b))
1554 || (evolution_function_is_constant_p (chrec_b)
1555 && evolution_function_is_univariate_p (chrec_a)))
1556 return true;
1557
1558 if (evolution_function_is_univariate_p (chrec_a)
1559 && evolution_function_is_univariate_p (chrec_b))
1560 {
1561 switch (TREE_CODE (chrec_a))
1562 {
1563 case POLYNOMIAL_CHREC:
1564 switch (TREE_CODE (chrec_b))
1565 {
1566 case POLYNOMIAL_CHREC:
1567 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1568 return false;
1569
1570 default:
1571 return true;
1572 }
1573
1574 default:
1575 return true;
1576 }
1577 }
1578
1579 return false;
1580 }
1581
1582 /* Creates a conflict function with N dimensions. The affine functions
1583 in each dimension follow. */
1584
1585 static conflict_function *
1586 conflict_fn (unsigned n, ...)
1587 {
1588 unsigned i;
1589 conflict_function *ret = XCNEW (conflict_function);
1590 va_list ap;
1591
1592 gcc_assert (0 < n && n <= MAX_DIM);
1593 va_start(ap, n);
1594
1595 ret->n = n;
1596 for (i = 0; i < n; i++)
1597 ret->fns[i] = va_arg (ap, affine_fn);
1598 va_end(ap);
1599
1600 return ret;
1601 }
1602
1603 /* Returns constant affine function with value CST. */
1604
1605 static affine_fn
1606 affine_fn_cst (tree cst)
1607 {
1608 affine_fn fn = VEC_alloc (tree, heap, 1);
1609 VEC_quick_push (tree, fn, cst);
1610 return fn;
1611 }
1612
1613 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1614
1615 static affine_fn
1616 affine_fn_univar (tree cst, unsigned dim, tree coef)
1617 {
1618 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1619 unsigned i;
1620
1621 gcc_assert (dim > 0);
1622 VEC_quick_push (tree, fn, cst);
1623 for (i = 1; i < dim; i++)
1624 VEC_quick_push (tree, fn, integer_zero_node);
1625 VEC_quick_push (tree, fn, coef);
1626 return fn;
1627 }
1628
1629 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1630 *OVERLAPS_B are initialized to the functions that describe the
1631 relation between the elements accessed twice by CHREC_A and
1632 CHREC_B. For k >= 0, the following property is verified:
1633
1634 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1635
1636 static void
1637 analyze_ziv_subscript (tree chrec_a,
1638 tree chrec_b,
1639 conflict_function **overlaps_a,
1640 conflict_function **overlaps_b,
1641 tree *last_conflicts)
1642 {
1643 tree type, difference;
1644 dependence_stats.num_ziv++;
1645
1646 if (dump_file && (dump_flags & TDF_DETAILS))
1647 fprintf (dump_file, "(analyze_ziv_subscript \n");
1648
1649 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1650 chrec_a = chrec_convert (type, chrec_a, NULL);
1651 chrec_b = chrec_convert (type, chrec_b, NULL);
1652 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1653
1654 switch (TREE_CODE (difference))
1655 {
1656 case INTEGER_CST:
1657 if (integer_zerop (difference))
1658 {
1659 /* The difference is equal to zero: the accessed index
1660 overlaps for each iteration in the loop. */
1661 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1662 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1663 *last_conflicts = chrec_dont_know;
1664 dependence_stats.num_ziv_dependent++;
1665 }
1666 else
1667 {
1668 /* The accesses do not overlap. */
1669 *overlaps_a = conflict_fn_no_dependence ();
1670 *overlaps_b = conflict_fn_no_dependence ();
1671 *last_conflicts = integer_zero_node;
1672 dependence_stats.num_ziv_independent++;
1673 }
1674 break;
1675
1676 default:
1677 /* We're not sure whether the indexes overlap. For the moment,
1678 conservatively answer "don't know". */
1679 if (dump_file && (dump_flags & TDF_DETAILS))
1680 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1681
1682 *overlaps_a = conflict_fn_not_known ();
1683 *overlaps_b = conflict_fn_not_known ();
1684 *last_conflicts = chrec_dont_know;
1685 dependence_stats.num_ziv_unimplemented++;
1686 break;
1687 }
1688
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1690 fprintf (dump_file, ")\n");
1691 }
1692
1693 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1694 and only if it fits to the int type. If this is not the case, or the
1695 bound on the number of iterations of LOOP could not be derived, returns
1696 chrec_dont_know. */
1697
1698 static tree
1699 max_stmt_executions_tree (struct loop *loop)
1700 {
1701 double_int nit;
1702
1703 if (!max_stmt_executions (loop, true, &nit))
1704 return chrec_dont_know;
1705
1706 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1707 return chrec_dont_know;
1708
1709 return double_int_to_tree (unsigned_type_node, nit);
1710 }
1711
1712 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1713 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1714 *OVERLAPS_B are initialized to the functions that describe the
1715 relation between the elements accessed twice by CHREC_A and
1716 CHREC_B. For k >= 0, the following property is verified:
1717
1718 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1719
1720 static void
1721 analyze_siv_subscript_cst_affine (tree chrec_a,
1722 tree chrec_b,
1723 conflict_function **overlaps_a,
1724 conflict_function **overlaps_b,
1725 tree *last_conflicts)
1726 {
1727 bool value0, value1, value2;
1728 tree type, difference, tmp;
1729
1730 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1731 chrec_a = chrec_convert (type, chrec_a, NULL);
1732 chrec_b = chrec_convert (type, chrec_b, NULL);
1733 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1734
1735 if (!chrec_is_positive (initial_condition (difference), &value0))
1736 {
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1738 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1739
1740 dependence_stats.num_siv_unimplemented++;
1741 *overlaps_a = conflict_fn_not_known ();
1742 *overlaps_b = conflict_fn_not_known ();
1743 *last_conflicts = chrec_dont_know;
1744 return;
1745 }
1746 else
1747 {
1748 if (value0 == false)
1749 {
1750 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1751 {
1752 if (dump_file && (dump_flags & TDF_DETAILS))
1753 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1754
1755 *overlaps_a = conflict_fn_not_known ();
1756 *overlaps_b = conflict_fn_not_known ();
1757 *last_conflicts = chrec_dont_know;
1758 dependence_stats.num_siv_unimplemented++;
1759 return;
1760 }
1761 else
1762 {
1763 if (value1 == true)
1764 {
1765 /* Example:
1766 chrec_a = 12
1767 chrec_b = {10, +, 1}
1768 */
1769
1770 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1771 {
1772 HOST_WIDE_INT numiter;
1773 struct loop *loop = get_chrec_loop (chrec_b);
1774
1775 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1776 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1777 fold_build1 (ABS_EXPR, type, difference),
1778 CHREC_RIGHT (chrec_b));
1779 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1780 *last_conflicts = integer_one_node;
1781
1782
1783 /* Perform weak-zero siv test to see if overlap is
1784 outside the loop bounds. */
1785 numiter = max_stmt_executions_int (loop, true);
1786
1787 if (numiter >= 0
1788 && compare_tree_int (tmp, numiter) > 0)
1789 {
1790 free_conflict_function (*overlaps_a);
1791 free_conflict_function (*overlaps_b);
1792 *overlaps_a = conflict_fn_no_dependence ();
1793 *overlaps_b = conflict_fn_no_dependence ();
1794 *last_conflicts = integer_zero_node;
1795 dependence_stats.num_siv_independent++;
1796 return;
1797 }
1798 dependence_stats.num_siv_dependent++;
1799 return;
1800 }
1801
1802 /* When the step does not divide the difference, there are
1803 no overlaps. */
1804 else
1805 {
1806 *overlaps_a = conflict_fn_no_dependence ();
1807 *overlaps_b = conflict_fn_no_dependence ();
1808 *last_conflicts = integer_zero_node;
1809 dependence_stats.num_siv_independent++;
1810 return;
1811 }
1812 }
1813
1814 else
1815 {
1816 /* Example:
1817 chrec_a = 12
1818 chrec_b = {10, +, -1}
1819
1820 In this case, chrec_a will not overlap with chrec_b. */
1821 *overlaps_a = conflict_fn_no_dependence ();
1822 *overlaps_b = conflict_fn_no_dependence ();
1823 *last_conflicts = integer_zero_node;
1824 dependence_stats.num_siv_independent++;
1825 return;
1826 }
1827 }
1828 }
1829 else
1830 {
1831 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1832 {
1833 if (dump_file && (dump_flags & TDF_DETAILS))
1834 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1835
1836 *overlaps_a = conflict_fn_not_known ();
1837 *overlaps_b = conflict_fn_not_known ();
1838 *last_conflicts = chrec_dont_know;
1839 dependence_stats.num_siv_unimplemented++;
1840 return;
1841 }
1842 else
1843 {
1844 if (value2 == false)
1845 {
1846 /* Example:
1847 chrec_a = 3
1848 chrec_b = {10, +, -1}
1849 */
1850 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1851 {
1852 HOST_WIDE_INT numiter;
1853 struct loop *loop = get_chrec_loop (chrec_b);
1854
1855 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1856 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1857 CHREC_RIGHT (chrec_b));
1858 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1859 *last_conflicts = integer_one_node;
1860
1861 /* Perform weak-zero siv test to see if overlap is
1862 outside the loop bounds. */
1863 numiter = max_stmt_executions_int (loop, true);
1864
1865 if (numiter >= 0
1866 && compare_tree_int (tmp, numiter) > 0)
1867 {
1868 free_conflict_function (*overlaps_a);
1869 free_conflict_function (*overlaps_b);
1870 *overlaps_a = conflict_fn_no_dependence ();
1871 *overlaps_b = conflict_fn_no_dependence ();
1872 *last_conflicts = integer_zero_node;
1873 dependence_stats.num_siv_independent++;
1874 return;
1875 }
1876 dependence_stats.num_siv_dependent++;
1877 return;
1878 }
1879
1880 /* When the step does not divide the difference, there
1881 are no overlaps. */
1882 else
1883 {
1884 *overlaps_a = conflict_fn_no_dependence ();
1885 *overlaps_b = conflict_fn_no_dependence ();
1886 *last_conflicts = integer_zero_node;
1887 dependence_stats.num_siv_independent++;
1888 return;
1889 }
1890 }
1891 else
1892 {
1893 /* Example:
1894 chrec_a = 3
1895 chrec_b = {4, +, 1}
1896
1897 In this case, chrec_a will not overlap with chrec_b. */
1898 *overlaps_a = conflict_fn_no_dependence ();
1899 *overlaps_b = conflict_fn_no_dependence ();
1900 *last_conflicts = integer_zero_node;
1901 dependence_stats.num_siv_independent++;
1902 return;
1903 }
1904 }
1905 }
1906 }
1907 }
1908
1909 /* Helper recursive function for initializing the matrix A. Returns
1910 the initial value of CHREC. */
1911
1912 static tree
1913 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1914 {
1915 gcc_assert (chrec);
1916
1917 switch (TREE_CODE (chrec))
1918 {
1919 case POLYNOMIAL_CHREC:
1920 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1921
1922 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1923 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1924
1925 case PLUS_EXPR:
1926 case MULT_EXPR:
1927 case MINUS_EXPR:
1928 {
1929 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1930 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1931
1932 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1933 }
1934
1935 case NOP_EXPR:
1936 {
1937 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1938 return chrec_convert (chrec_type (chrec), op, NULL);
1939 }
1940
1941 case BIT_NOT_EXPR:
1942 {
1943 /* Handle ~X as -1 - X. */
1944 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1945 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1946 build_int_cst (TREE_TYPE (chrec), -1), op);
1947 }
1948
1949 case INTEGER_CST:
1950 return chrec;
1951
1952 default:
1953 gcc_unreachable ();
1954 return NULL_TREE;
1955 }
1956 }
1957
1958 #define FLOOR_DIV(x,y) ((x) / (y))
1959
1960 /* Solves the special case of the Diophantine equation:
1961 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1962
1963 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1964 number of iterations that loops X and Y run. The overlaps will be
1965 constructed as evolutions in dimension DIM. */
1966
1967 static void
1968 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1969 affine_fn *overlaps_a,
1970 affine_fn *overlaps_b,
1971 tree *last_conflicts, int dim)
1972 {
1973 if (((step_a > 0 && step_b > 0)
1974 || (step_a < 0 && step_b < 0)))
1975 {
1976 int step_overlaps_a, step_overlaps_b;
1977 int gcd_steps_a_b, last_conflict, tau2;
1978
1979 gcd_steps_a_b = gcd (step_a, step_b);
1980 step_overlaps_a = step_b / gcd_steps_a_b;
1981 step_overlaps_b = step_a / gcd_steps_a_b;
1982
1983 if (niter > 0)
1984 {
1985 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1986 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1987 last_conflict = tau2;
1988 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1989 }
1990 else
1991 *last_conflicts = chrec_dont_know;
1992
1993 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1994 build_int_cst (NULL_TREE,
1995 step_overlaps_a));
1996 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1997 build_int_cst (NULL_TREE,
1998 step_overlaps_b));
1999 }
2000
2001 else
2002 {
2003 *overlaps_a = affine_fn_cst (integer_zero_node);
2004 *overlaps_b = affine_fn_cst (integer_zero_node);
2005 *last_conflicts = integer_zero_node;
2006 }
2007 }
2008
2009 /* Solves the special case of a Diophantine equation where CHREC_A is
2010 an affine bivariate function, and CHREC_B is an affine univariate
2011 function. For example,
2012
2013 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2014
2015 has the following overlapping functions:
2016
2017 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2018 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2019 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2020
2021 FORNOW: This is a specialized implementation for a case occurring in
2022 a common benchmark. Implement the general algorithm. */
2023
2024 static void
2025 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2026 conflict_function **overlaps_a,
2027 conflict_function **overlaps_b,
2028 tree *last_conflicts)
2029 {
2030 bool xz_p, yz_p, xyz_p;
2031 int step_x, step_y, step_z;
2032 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2033 affine_fn overlaps_a_xz, overlaps_b_xz;
2034 affine_fn overlaps_a_yz, overlaps_b_yz;
2035 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2036 affine_fn ova1, ova2, ovb;
2037 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2038
2039 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2040 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2041 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2042
2043 niter_x =
2044 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2045 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2046 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2047
2048 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2049 {
2050 if (dump_file && (dump_flags & TDF_DETAILS))
2051 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2052
2053 *overlaps_a = conflict_fn_not_known ();
2054 *overlaps_b = conflict_fn_not_known ();
2055 *last_conflicts = chrec_dont_know;
2056 return;
2057 }
2058
2059 niter = MIN (niter_x, niter_z);
2060 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2061 &overlaps_a_xz,
2062 &overlaps_b_xz,
2063 &last_conflicts_xz, 1);
2064 niter = MIN (niter_y, niter_z);
2065 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2066 &overlaps_a_yz,
2067 &overlaps_b_yz,
2068 &last_conflicts_yz, 2);
2069 niter = MIN (niter_x, niter_z);
2070 niter = MIN (niter_y, niter);
2071 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2072 &overlaps_a_xyz,
2073 &overlaps_b_xyz,
2074 &last_conflicts_xyz, 3);
2075
2076 xz_p = !integer_zerop (last_conflicts_xz);
2077 yz_p = !integer_zerop (last_conflicts_yz);
2078 xyz_p = !integer_zerop (last_conflicts_xyz);
2079
2080 if (xz_p || yz_p || xyz_p)
2081 {
2082 ova1 = affine_fn_cst (integer_zero_node);
2083 ova2 = affine_fn_cst (integer_zero_node);
2084 ovb = affine_fn_cst (integer_zero_node);
2085 if (xz_p)
2086 {
2087 affine_fn t0 = ova1;
2088 affine_fn t2 = ovb;
2089
2090 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2091 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2092 affine_fn_free (t0);
2093 affine_fn_free (t2);
2094 *last_conflicts = last_conflicts_xz;
2095 }
2096 if (yz_p)
2097 {
2098 affine_fn t0 = ova2;
2099 affine_fn t2 = ovb;
2100
2101 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2102 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2103 affine_fn_free (t0);
2104 affine_fn_free (t2);
2105 *last_conflicts = last_conflicts_yz;
2106 }
2107 if (xyz_p)
2108 {
2109 affine_fn t0 = ova1;
2110 affine_fn t2 = ova2;
2111 affine_fn t4 = ovb;
2112
2113 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2114 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2115 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2116 affine_fn_free (t0);
2117 affine_fn_free (t2);
2118 affine_fn_free (t4);
2119 *last_conflicts = last_conflicts_xyz;
2120 }
2121 *overlaps_a = conflict_fn (2, ova1, ova2);
2122 *overlaps_b = conflict_fn (1, ovb);
2123 }
2124 else
2125 {
2126 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2127 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2128 *last_conflicts = integer_zero_node;
2129 }
2130
2131 affine_fn_free (overlaps_a_xz);
2132 affine_fn_free (overlaps_b_xz);
2133 affine_fn_free (overlaps_a_yz);
2134 affine_fn_free (overlaps_b_yz);
2135 affine_fn_free (overlaps_a_xyz);
2136 affine_fn_free (overlaps_b_xyz);
2137 }
2138
2139 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2140
2141 static void
2142 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2143 int size)
2144 {
2145 memcpy (vec2, vec1, size * sizeof (*vec1));
2146 }
2147
2148 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2149
2150 static void
2151 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2152 int m, int n)
2153 {
2154 int i;
2155
2156 for (i = 0; i < m; i++)
2157 lambda_vector_copy (mat1[i], mat2[i], n);
2158 }
2159
2160 /* Store the N x N identity matrix in MAT. */
2161
2162 static void
2163 lambda_matrix_id (lambda_matrix mat, int size)
2164 {
2165 int i, j;
2166
2167 for (i = 0; i < size; i++)
2168 for (j = 0; j < size; j++)
2169 mat[i][j] = (i == j) ? 1 : 0;
2170 }
2171
2172 /* Return the first nonzero element of vector VEC1 between START and N.
2173 We must have START <= N. Returns N if VEC1 is the zero vector. */
2174
2175 static int
2176 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2177 {
2178 int j = start;
2179 while (j < n && vec1[j] == 0)
2180 j++;
2181 return j;
2182 }
2183
2184 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2185 R2 = R2 + CONST1 * R1. */
2186
2187 static void
2188 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2189 {
2190 int i;
2191
2192 if (const1 == 0)
2193 return;
2194
2195 for (i = 0; i < n; i++)
2196 mat[r2][i] += const1 * mat[r1][i];
2197 }
2198
2199 /* Swap rows R1 and R2 in matrix MAT. */
2200
2201 static void
2202 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2203 {
2204 lambda_vector row;
2205
2206 row = mat[r1];
2207 mat[r1] = mat[r2];
2208 mat[r2] = row;
2209 }
2210
2211 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2212 and store the result in VEC2. */
2213
2214 static void
2215 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2216 int size, int const1)
2217 {
2218 int i;
2219
2220 if (const1 == 0)
2221 lambda_vector_clear (vec2, size);
2222 else
2223 for (i = 0; i < size; i++)
2224 vec2[i] = const1 * vec1[i];
2225 }
2226
2227 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2228
2229 static void
2230 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2231 int size)
2232 {
2233 lambda_vector_mult_const (vec1, vec2, size, -1);
2234 }
2235
2236 /* Negate row R1 of matrix MAT which has N columns. */
2237
2238 static void
2239 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2240 {
2241 lambda_vector_negate (mat[r1], mat[r1], n);
2242 }
2243
2244 /* Return true if two vectors are equal. */
2245
2246 static bool
2247 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2248 {
2249 int i;
2250 for (i = 0; i < size; i++)
2251 if (vec1[i] != vec2[i])
2252 return false;
2253 return true;
2254 }
2255
2256 /* Given an M x N integer matrix A, this function determines an M x
2257 M unimodular matrix U, and an M x N echelon matrix S such that
2258 "U.A = S". This decomposition is also known as "right Hermite".
2259
2260 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2261 Restructuring Compilers" Utpal Banerjee. */
2262
2263 static void
2264 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2265 lambda_matrix S, lambda_matrix U)
2266 {
2267 int i, j, i0 = 0;
2268
2269 lambda_matrix_copy (A, S, m, n);
2270 lambda_matrix_id (U, m);
2271
2272 for (j = 0; j < n; j++)
2273 {
2274 if (lambda_vector_first_nz (S[j], m, i0) < m)
2275 {
2276 ++i0;
2277 for (i = m - 1; i >= i0; i--)
2278 {
2279 while (S[i][j] != 0)
2280 {
2281 int sigma, factor, a, b;
2282
2283 a = S[i-1][j];
2284 b = S[i][j];
2285 sigma = (a * b < 0) ? -1: 1;
2286 a = abs (a);
2287 b = abs (b);
2288 factor = sigma * (a / b);
2289
2290 lambda_matrix_row_add (S, n, i, i-1, -factor);
2291 lambda_matrix_row_exchange (S, i, i-1);
2292
2293 lambda_matrix_row_add (U, m, i, i-1, -factor);
2294 lambda_matrix_row_exchange (U, i, i-1);
2295 }
2296 }
2297 }
2298 }
2299 }
2300
2301 /* Determines the overlapping elements due to accesses CHREC_A and
2302 CHREC_B, that are affine functions. This function cannot handle
2303 symbolic evolution functions, ie. when initial conditions are
2304 parameters, because it uses lambda matrices of integers. */
2305
2306 static void
2307 analyze_subscript_affine_affine (tree chrec_a,
2308 tree chrec_b,
2309 conflict_function **overlaps_a,
2310 conflict_function **overlaps_b,
2311 tree *last_conflicts)
2312 {
2313 unsigned nb_vars_a, nb_vars_b, dim;
2314 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2315 lambda_matrix A, U, S;
2316 struct obstack scratch_obstack;
2317
2318 if (eq_evolutions_p (chrec_a, chrec_b))
2319 {
2320 /* The accessed index overlaps for each iteration in the
2321 loop. */
2322 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2323 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2324 *last_conflicts = chrec_dont_know;
2325 return;
2326 }
2327 if (dump_file && (dump_flags & TDF_DETAILS))
2328 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2329
2330 /* For determining the initial intersection, we have to solve a
2331 Diophantine equation. This is the most time consuming part.
2332
2333 For answering to the question: "Is there a dependence?" we have
2334 to prove that there exists a solution to the Diophantine
2335 equation, and that the solution is in the iteration domain,
2336 i.e. the solution is positive or zero, and that the solution
2337 happens before the upper bound loop.nb_iterations. Otherwise
2338 there is no dependence. This function outputs a description of
2339 the iterations that hold the intersections. */
2340
2341 nb_vars_a = nb_vars_in_chrec (chrec_a);
2342 nb_vars_b = nb_vars_in_chrec (chrec_b);
2343
2344 gcc_obstack_init (&scratch_obstack);
2345
2346 dim = nb_vars_a + nb_vars_b;
2347 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2348 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2349 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2350
2351 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2352 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2353 gamma = init_b - init_a;
2354
2355 /* Don't do all the hard work of solving the Diophantine equation
2356 when we already know the solution: for example,
2357 | {3, +, 1}_1
2358 | {3, +, 4}_2
2359 | gamma = 3 - 3 = 0.
2360 Then the first overlap occurs during the first iterations:
2361 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2362 */
2363 if (gamma == 0)
2364 {
2365 if (nb_vars_a == 1 && nb_vars_b == 1)
2366 {
2367 HOST_WIDE_INT step_a, step_b;
2368 HOST_WIDE_INT niter, niter_a, niter_b;
2369 affine_fn ova, ovb;
2370
2371 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2372 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2373 niter = MIN (niter_a, niter_b);
2374 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2375 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2376
2377 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2378 &ova, &ovb,
2379 last_conflicts, 1);
2380 *overlaps_a = conflict_fn (1, ova);
2381 *overlaps_b = conflict_fn (1, ovb);
2382 }
2383
2384 else if (nb_vars_a == 2 && nb_vars_b == 1)
2385 compute_overlap_steps_for_affine_1_2
2386 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2387
2388 else if (nb_vars_a == 1 && nb_vars_b == 2)
2389 compute_overlap_steps_for_affine_1_2
2390 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2391
2392 else
2393 {
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2395 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2396 *overlaps_a = conflict_fn_not_known ();
2397 *overlaps_b = conflict_fn_not_known ();
2398 *last_conflicts = chrec_dont_know;
2399 }
2400 goto end_analyze_subs_aa;
2401 }
2402
2403 /* U.A = S */
2404 lambda_matrix_right_hermite (A, dim, 1, S, U);
2405
2406 if (S[0][0] < 0)
2407 {
2408 S[0][0] *= -1;
2409 lambda_matrix_row_negate (U, dim, 0);
2410 }
2411 gcd_alpha_beta = S[0][0];
2412
2413 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2414 but that is a quite strange case. Instead of ICEing, answer
2415 don't know. */
2416 if (gcd_alpha_beta == 0)
2417 {
2418 *overlaps_a = conflict_fn_not_known ();
2419 *overlaps_b = conflict_fn_not_known ();
2420 *last_conflicts = chrec_dont_know;
2421 goto end_analyze_subs_aa;
2422 }
2423
2424 /* The classic "gcd-test". */
2425 if (!int_divides_p (gcd_alpha_beta, gamma))
2426 {
2427 /* The "gcd-test" has determined that there is no integer
2428 solution, i.e. there is no dependence. */
2429 *overlaps_a = conflict_fn_no_dependence ();
2430 *overlaps_b = conflict_fn_no_dependence ();
2431 *last_conflicts = integer_zero_node;
2432 }
2433
2434 /* Both access functions are univariate. This includes SIV and MIV cases. */
2435 else if (nb_vars_a == 1 && nb_vars_b == 1)
2436 {
2437 /* Both functions should have the same evolution sign. */
2438 if (((A[0][0] > 0 && -A[1][0] > 0)
2439 || (A[0][0] < 0 && -A[1][0] < 0)))
2440 {
2441 /* The solutions are given by:
2442 |
2443 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2444 | [u21 u22] [y0]
2445
2446 For a given integer t. Using the following variables,
2447
2448 | i0 = u11 * gamma / gcd_alpha_beta
2449 | j0 = u12 * gamma / gcd_alpha_beta
2450 | i1 = u21
2451 | j1 = u22
2452
2453 the solutions are:
2454
2455 | x0 = i0 + i1 * t,
2456 | y0 = j0 + j1 * t. */
2457 HOST_WIDE_INT i0, j0, i1, j1;
2458
2459 i0 = U[0][0] * gamma / gcd_alpha_beta;
2460 j0 = U[0][1] * gamma / gcd_alpha_beta;
2461 i1 = U[1][0];
2462 j1 = U[1][1];
2463
2464 if ((i1 == 0 && i0 < 0)
2465 || (j1 == 0 && j0 < 0))
2466 {
2467 /* There is no solution.
2468 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2469 falls in here, but for the moment we don't look at the
2470 upper bound of the iteration domain. */
2471 *overlaps_a = conflict_fn_no_dependence ();
2472 *overlaps_b = conflict_fn_no_dependence ();
2473 *last_conflicts = integer_zero_node;
2474 goto end_analyze_subs_aa;
2475 }
2476
2477 if (i1 > 0 && j1 > 0)
2478 {
2479 HOST_WIDE_INT niter_a = max_stmt_executions_int
2480 (get_chrec_loop (chrec_a), true);
2481 HOST_WIDE_INT niter_b = max_stmt_executions_int
2482 (get_chrec_loop (chrec_b), true);
2483 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2484
2485 /* (X0, Y0) is a solution of the Diophantine equation:
2486 "chrec_a (X0) = chrec_b (Y0)". */
2487 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2488 CEIL (-j0, j1));
2489 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2490 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2491
2492 /* (X1, Y1) is the smallest positive solution of the eq
2493 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2494 first conflict occurs. */
2495 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2496 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2497 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2498
2499 if (niter > 0)
2500 {
2501 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2502 FLOOR_DIV (niter - j0, j1));
2503 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2504
2505 /* If the overlap occurs outside of the bounds of the
2506 loop, there is no dependence. */
2507 if (x1 >= niter || y1 >= niter)
2508 {
2509 *overlaps_a = conflict_fn_no_dependence ();
2510 *overlaps_b = conflict_fn_no_dependence ();
2511 *last_conflicts = integer_zero_node;
2512 goto end_analyze_subs_aa;
2513 }
2514 else
2515 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2516 }
2517 else
2518 *last_conflicts = chrec_dont_know;
2519
2520 *overlaps_a
2521 = conflict_fn (1,
2522 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2523 1,
2524 build_int_cst (NULL_TREE, i1)));
2525 *overlaps_b
2526 = conflict_fn (1,
2527 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2528 1,
2529 build_int_cst (NULL_TREE, j1)));
2530 }
2531 else
2532 {
2533 /* FIXME: For the moment, the upper bound of the
2534 iteration domain for i and j is not checked. */
2535 if (dump_file && (dump_flags & TDF_DETAILS))
2536 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2537 *overlaps_a = conflict_fn_not_known ();
2538 *overlaps_b = conflict_fn_not_known ();
2539 *last_conflicts = chrec_dont_know;
2540 }
2541 }
2542 else
2543 {
2544 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2546 *overlaps_a = conflict_fn_not_known ();
2547 *overlaps_b = conflict_fn_not_known ();
2548 *last_conflicts = chrec_dont_know;
2549 }
2550 }
2551 else
2552 {
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2554 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2555 *overlaps_a = conflict_fn_not_known ();
2556 *overlaps_b = conflict_fn_not_known ();
2557 *last_conflicts = chrec_dont_know;
2558 }
2559
2560 end_analyze_subs_aa:
2561 obstack_free (&scratch_obstack, NULL);
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 {
2564 fprintf (dump_file, " (overlaps_a = ");
2565 dump_conflict_function (dump_file, *overlaps_a);
2566 fprintf (dump_file, ")\n (overlaps_b = ");
2567 dump_conflict_function (dump_file, *overlaps_b);
2568 fprintf (dump_file, ")\n");
2569 fprintf (dump_file, ")\n");
2570 }
2571 }
2572
2573 /* Returns true when analyze_subscript_affine_affine can be used for
2574 determining the dependence relation between chrec_a and chrec_b,
2575 that contain symbols. This function modifies chrec_a and chrec_b
2576 such that the analysis result is the same, and such that they don't
2577 contain symbols, and then can safely be passed to the analyzer.
2578
2579 Example: The analysis of the following tuples of evolutions produce
2580 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2581 vs. {0, +, 1}_1
2582
2583 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2584 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2585 */
2586
2587 static bool
2588 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2589 {
2590 tree diff, type, left_a, left_b, right_b;
2591
2592 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2593 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2594 /* FIXME: For the moment not handled. Might be refined later. */
2595 return false;
2596
2597 type = chrec_type (*chrec_a);
2598 left_a = CHREC_LEFT (*chrec_a);
2599 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2600 diff = chrec_fold_minus (type, left_a, left_b);
2601
2602 if (!evolution_function_is_constant_p (diff))
2603 return false;
2604
2605 if (dump_file && (dump_flags & TDF_DETAILS))
2606 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2607
2608 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2609 diff, CHREC_RIGHT (*chrec_a));
2610 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2611 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2612 build_int_cst (type, 0),
2613 right_b);
2614 return true;
2615 }
2616
2617 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2618 *OVERLAPS_B are initialized to the functions that describe the
2619 relation between the elements accessed twice by CHREC_A and
2620 CHREC_B. For k >= 0, the following property is verified:
2621
2622 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2623
2624 static void
2625 analyze_siv_subscript (tree chrec_a,
2626 tree chrec_b,
2627 conflict_function **overlaps_a,
2628 conflict_function **overlaps_b,
2629 tree *last_conflicts,
2630 int loop_nest_num)
2631 {
2632 dependence_stats.num_siv++;
2633
2634 if (dump_file && (dump_flags & TDF_DETAILS))
2635 fprintf (dump_file, "(analyze_siv_subscript \n");
2636
2637 if (evolution_function_is_constant_p (chrec_a)
2638 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2639 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2640 overlaps_a, overlaps_b, last_conflicts);
2641
2642 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2643 && evolution_function_is_constant_p (chrec_b))
2644 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2645 overlaps_b, overlaps_a, last_conflicts);
2646
2647 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2648 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2649 {
2650 if (!chrec_contains_symbols (chrec_a)
2651 && !chrec_contains_symbols (chrec_b))
2652 {
2653 analyze_subscript_affine_affine (chrec_a, chrec_b,
2654 overlaps_a, overlaps_b,
2655 last_conflicts);
2656
2657 if (CF_NOT_KNOWN_P (*overlaps_a)
2658 || CF_NOT_KNOWN_P (*overlaps_b))
2659 dependence_stats.num_siv_unimplemented++;
2660 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2661 || CF_NO_DEPENDENCE_P (*overlaps_b))
2662 dependence_stats.num_siv_independent++;
2663 else
2664 dependence_stats.num_siv_dependent++;
2665 }
2666 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2667 &chrec_b))
2668 {
2669 analyze_subscript_affine_affine (chrec_a, chrec_b,
2670 overlaps_a, overlaps_b,
2671 last_conflicts);
2672
2673 if (CF_NOT_KNOWN_P (*overlaps_a)
2674 || CF_NOT_KNOWN_P (*overlaps_b))
2675 dependence_stats.num_siv_unimplemented++;
2676 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2677 || CF_NO_DEPENDENCE_P (*overlaps_b))
2678 dependence_stats.num_siv_independent++;
2679 else
2680 dependence_stats.num_siv_dependent++;
2681 }
2682 else
2683 goto siv_subscript_dontknow;
2684 }
2685
2686 else
2687 {
2688 siv_subscript_dontknow:;
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file, "siv test failed: unimplemented.\n");
2691 *overlaps_a = conflict_fn_not_known ();
2692 *overlaps_b = conflict_fn_not_known ();
2693 *last_conflicts = chrec_dont_know;
2694 dependence_stats.num_siv_unimplemented++;
2695 }
2696
2697 if (dump_file && (dump_flags & TDF_DETAILS))
2698 fprintf (dump_file, ")\n");
2699 }
2700
2701 /* Returns false if we can prove that the greatest common divisor of the steps
2702 of CHREC does not divide CST, false otherwise. */
2703
2704 static bool
2705 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2706 {
2707 HOST_WIDE_INT cd = 0, val;
2708 tree step;
2709
2710 if (!host_integerp (cst, 0))
2711 return true;
2712 val = tree_low_cst (cst, 0);
2713
2714 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2715 {
2716 step = CHREC_RIGHT (chrec);
2717 if (!host_integerp (step, 0))
2718 return true;
2719 cd = gcd (cd, tree_low_cst (step, 0));
2720 chrec = CHREC_LEFT (chrec);
2721 }
2722
2723 return val % cd == 0;
2724 }
2725
2726 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2727 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2728 functions that describe the relation between the elements accessed
2729 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2730 is verified:
2731
2732 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2733
2734 static void
2735 analyze_miv_subscript (tree chrec_a,
2736 tree chrec_b,
2737 conflict_function **overlaps_a,
2738 conflict_function **overlaps_b,
2739 tree *last_conflicts,
2740 struct loop *loop_nest)
2741 {
2742 tree type, difference;
2743
2744 dependence_stats.num_miv++;
2745 if (dump_file && (dump_flags & TDF_DETAILS))
2746 fprintf (dump_file, "(analyze_miv_subscript \n");
2747
2748 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2749 chrec_a = chrec_convert (type, chrec_a, NULL);
2750 chrec_b = chrec_convert (type, chrec_b, NULL);
2751 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2752
2753 if (eq_evolutions_p (chrec_a, chrec_b))
2754 {
2755 /* Access functions are the same: all the elements are accessed
2756 in the same order. */
2757 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2758 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2759 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2760 dependence_stats.num_miv_dependent++;
2761 }
2762
2763 else if (evolution_function_is_constant_p (difference)
2764 /* For the moment, the following is verified:
2765 evolution_function_is_affine_multivariate_p (chrec_a,
2766 loop_nest->num) */
2767 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2768 {
2769 /* testsuite/.../ssa-chrec-33.c
2770 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2771
2772 The difference is 1, and all the evolution steps are multiples
2773 of 2, consequently there are no overlapping elements. */
2774 *overlaps_a = conflict_fn_no_dependence ();
2775 *overlaps_b = conflict_fn_no_dependence ();
2776 *last_conflicts = integer_zero_node;
2777 dependence_stats.num_miv_independent++;
2778 }
2779
2780 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2781 && !chrec_contains_symbols (chrec_a)
2782 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2783 && !chrec_contains_symbols (chrec_b))
2784 {
2785 /* testsuite/.../ssa-chrec-35.c
2786 {0, +, 1}_2 vs. {0, +, 1}_3
2787 the overlapping elements are respectively located at iterations:
2788 {0, +, 1}_x and {0, +, 1}_x,
2789 in other words, we have the equality:
2790 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2791
2792 Other examples:
2793 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2794 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2795
2796 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2797 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2798 */
2799 analyze_subscript_affine_affine (chrec_a, chrec_b,
2800 overlaps_a, overlaps_b, last_conflicts);
2801
2802 if (CF_NOT_KNOWN_P (*overlaps_a)
2803 || CF_NOT_KNOWN_P (*overlaps_b))
2804 dependence_stats.num_miv_unimplemented++;
2805 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2806 || CF_NO_DEPENDENCE_P (*overlaps_b))
2807 dependence_stats.num_miv_independent++;
2808 else
2809 dependence_stats.num_miv_dependent++;
2810 }
2811
2812 else
2813 {
2814 /* When the analysis is too difficult, answer "don't know". */
2815 if (dump_file && (dump_flags & TDF_DETAILS))
2816 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2817
2818 *overlaps_a = conflict_fn_not_known ();
2819 *overlaps_b = conflict_fn_not_known ();
2820 *last_conflicts = chrec_dont_know;
2821 dependence_stats.num_miv_unimplemented++;
2822 }
2823
2824 if (dump_file && (dump_flags & TDF_DETAILS))
2825 fprintf (dump_file, ")\n");
2826 }
2827
2828 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2829 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2830 OVERLAP_ITERATIONS_B are initialized with two functions that
2831 describe the iterations that contain conflicting elements.
2832
2833 Remark: For an integer k >= 0, the following equality is true:
2834
2835 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2836 */
2837
2838 static void
2839 analyze_overlapping_iterations (tree chrec_a,
2840 tree chrec_b,
2841 conflict_function **overlap_iterations_a,
2842 conflict_function **overlap_iterations_b,
2843 tree *last_conflicts, struct loop *loop_nest)
2844 {
2845 unsigned int lnn = loop_nest->num;
2846
2847 dependence_stats.num_subscript_tests++;
2848
2849 if (dump_file && (dump_flags & TDF_DETAILS))
2850 {
2851 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2852 fprintf (dump_file, " (chrec_a = ");
2853 print_generic_expr (dump_file, chrec_a, 0);
2854 fprintf (dump_file, ")\n (chrec_b = ");
2855 print_generic_expr (dump_file, chrec_b, 0);
2856 fprintf (dump_file, ")\n");
2857 }
2858
2859 if (chrec_a == NULL_TREE
2860 || chrec_b == NULL_TREE
2861 || chrec_contains_undetermined (chrec_a)
2862 || chrec_contains_undetermined (chrec_b))
2863 {
2864 dependence_stats.num_subscript_undetermined++;
2865
2866 *overlap_iterations_a = conflict_fn_not_known ();
2867 *overlap_iterations_b = conflict_fn_not_known ();
2868 }
2869
2870 /* If they are the same chrec, and are affine, they overlap
2871 on every iteration. */
2872 else if (eq_evolutions_p (chrec_a, chrec_b)
2873 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2874 || operand_equal_p (chrec_a, chrec_b, 0)))
2875 {
2876 dependence_stats.num_same_subscript_function++;
2877 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2878 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2879 *last_conflicts = chrec_dont_know;
2880 }
2881
2882 /* If they aren't the same, and aren't affine, we can't do anything
2883 yet. */
2884 else if ((chrec_contains_symbols (chrec_a)
2885 || chrec_contains_symbols (chrec_b))
2886 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2887 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2888 {
2889 dependence_stats.num_subscript_undetermined++;
2890 *overlap_iterations_a = conflict_fn_not_known ();
2891 *overlap_iterations_b = conflict_fn_not_known ();
2892 }
2893
2894 else if (ziv_subscript_p (chrec_a, chrec_b))
2895 analyze_ziv_subscript (chrec_a, chrec_b,
2896 overlap_iterations_a, overlap_iterations_b,
2897 last_conflicts);
2898
2899 else if (siv_subscript_p (chrec_a, chrec_b))
2900 analyze_siv_subscript (chrec_a, chrec_b,
2901 overlap_iterations_a, overlap_iterations_b,
2902 last_conflicts, lnn);
2903
2904 else
2905 analyze_miv_subscript (chrec_a, chrec_b,
2906 overlap_iterations_a, overlap_iterations_b,
2907 last_conflicts, loop_nest);
2908
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2910 {
2911 fprintf (dump_file, " (overlap_iterations_a = ");
2912 dump_conflict_function (dump_file, *overlap_iterations_a);
2913 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2914 dump_conflict_function (dump_file, *overlap_iterations_b);
2915 fprintf (dump_file, ")\n");
2916 fprintf (dump_file, ")\n");
2917 }
2918 }
2919
2920 /* Helper function for uniquely inserting distance vectors. */
2921
2922 static void
2923 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2924 {
2925 unsigned i;
2926 lambda_vector v;
2927
2928 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2929 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2930 return;
2931
2932 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2933 }
2934
2935 /* Helper function for uniquely inserting direction vectors. */
2936
2937 static void
2938 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2939 {
2940 unsigned i;
2941 lambda_vector v;
2942
2943 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2944 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2945 return;
2946
2947 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2948 }
2949
2950 /* Add a distance of 1 on all the loops outer than INDEX. If we
2951 haven't yet determined a distance for this outer loop, push a new
2952 distance vector composed of the previous distance, and a distance
2953 of 1 for this outer loop. Example:
2954
2955 | loop_1
2956 | loop_2
2957 | A[10]
2958 | endloop_2
2959 | endloop_1
2960
2961 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2962 save (0, 1), then we have to save (1, 0). */
2963
2964 static void
2965 add_outer_distances (struct data_dependence_relation *ddr,
2966 lambda_vector dist_v, int index)
2967 {
2968 /* For each outer loop where init_v is not set, the accesses are
2969 in dependence of distance 1 in the loop. */
2970 while (--index >= 0)
2971 {
2972 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2973 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2974 save_v[index] = 1;
2975 save_dist_v (ddr, save_v);
2976 }
2977 }
2978
2979 /* Return false when fail to represent the data dependence as a
2980 distance vector. INIT_B is set to true when a component has been
2981 added to the distance vector DIST_V. INDEX_CARRY is then set to
2982 the index in DIST_V that carries the dependence. */
2983
2984 static bool
2985 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2986 struct data_reference *ddr_a,
2987 struct data_reference *ddr_b,
2988 lambda_vector dist_v, bool *init_b,
2989 int *index_carry)
2990 {
2991 unsigned i;
2992 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2993
2994 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2995 {
2996 tree access_fn_a, access_fn_b;
2997 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2998
2999 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3000 {
3001 non_affine_dependence_relation (ddr);
3002 return false;
3003 }
3004
3005 access_fn_a = DR_ACCESS_FN (ddr_a, i);
3006 access_fn_b = DR_ACCESS_FN (ddr_b, i);
3007
3008 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3009 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3010 {
3011 int dist, index;
3012 int var_a = CHREC_VARIABLE (access_fn_a);
3013 int var_b = CHREC_VARIABLE (access_fn_b);
3014
3015 if (var_a != var_b
3016 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3017 {
3018 non_affine_dependence_relation (ddr);
3019 return false;
3020 }
3021
3022 dist = int_cst_value (SUB_DISTANCE (subscript));
3023 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3024 *index_carry = MIN (index, *index_carry);
3025
3026 /* This is the subscript coupling test. If we have already
3027 recorded a distance for this loop (a distance coming from
3028 another subscript), it should be the same. For example,
3029 in the following code, there is no dependence:
3030
3031 | loop i = 0, N, 1
3032 | T[i+1][i] = ...
3033 | ... = T[i][i]
3034 | endloop
3035 */
3036 if (init_v[index] != 0 && dist_v[index] != dist)
3037 {
3038 finalize_ddr_dependent (ddr, chrec_known);
3039 return false;
3040 }
3041
3042 dist_v[index] = dist;
3043 init_v[index] = 1;
3044 *init_b = true;
3045 }
3046 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3047 {
3048 /* This can be for example an affine vs. constant dependence
3049 (T[i] vs. T[3]) that is not an affine dependence and is
3050 not representable as a distance vector. */
3051 non_affine_dependence_relation (ddr);
3052 return false;
3053 }
3054 }
3055
3056 return true;
3057 }
3058
3059 /* Return true when the DDR contains only constant access functions. */
3060
3061 static bool
3062 constant_access_functions (const struct data_dependence_relation *ddr)
3063 {
3064 unsigned i;
3065
3066 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3067 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3068 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3069 return false;
3070
3071 return true;
3072 }
3073
3074 /* Helper function for the case where DDR_A and DDR_B are the same
3075 multivariate access function with a constant step. For an example
3076 see pr34635-1.c. */
3077
3078 static void
3079 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3080 {
3081 int x_1, x_2;
3082 tree c_1 = CHREC_LEFT (c_2);
3083 tree c_0 = CHREC_LEFT (c_1);
3084 lambda_vector dist_v;
3085 int v1, v2, cd;
3086
3087 /* Polynomials with more than 2 variables are not handled yet. When
3088 the evolution steps are parameters, it is not possible to
3089 represent the dependence using classical distance vectors. */
3090 if (TREE_CODE (c_0) != INTEGER_CST
3091 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3092 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3093 {
3094 DDR_AFFINE_P (ddr) = false;
3095 return;
3096 }
3097
3098 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3099 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3100
3101 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3102 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3103 v1 = int_cst_value (CHREC_RIGHT (c_1));
3104 v2 = int_cst_value (CHREC_RIGHT (c_2));
3105 cd = gcd (v1, v2);
3106 v1 /= cd;
3107 v2 /= cd;
3108
3109 if (v2 < 0)
3110 {
3111 v2 = -v2;
3112 v1 = -v1;
3113 }
3114
3115 dist_v[x_1] = v2;
3116 dist_v[x_2] = -v1;
3117 save_dist_v (ddr, dist_v);
3118
3119 add_outer_distances (ddr, dist_v, x_1);
3120 }
3121
3122 /* Helper function for the case where DDR_A and DDR_B are the same
3123 access functions. */
3124
3125 static void
3126 add_other_self_distances (struct data_dependence_relation *ddr)
3127 {
3128 lambda_vector dist_v;
3129 unsigned i;
3130 int index_carry = DDR_NB_LOOPS (ddr);
3131
3132 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3133 {
3134 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3135
3136 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3137 {
3138 if (!evolution_function_is_univariate_p (access_fun))
3139 {
3140 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3141 {
3142 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3143 return;
3144 }
3145
3146 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3147
3148 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3149 add_multivariate_self_dist (ddr, access_fun);
3150 else
3151 /* The evolution step is not constant: it varies in
3152 the outer loop, so this cannot be represented by a
3153 distance vector. For example in pr34635.c the
3154 evolution is {0, +, {0, +, 4}_1}_2. */
3155 DDR_AFFINE_P (ddr) = false;
3156
3157 return;
3158 }
3159
3160 index_carry = MIN (index_carry,
3161 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3162 DDR_LOOP_NEST (ddr)));
3163 }
3164 }
3165
3166 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3167 add_outer_distances (ddr, dist_v, index_carry);
3168 }
3169
3170 static void
3171 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3172 {
3173 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3174
3175 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3176 save_dist_v (ddr, dist_v);
3177 }
3178
3179 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3180 is the case for example when access functions are the same and
3181 equal to a constant, as in:
3182
3183 | loop_1
3184 | A[3] = ...
3185 | ... = A[3]
3186 | endloop_1
3187
3188 in which case the distance vectors are (0) and (1). */
3189
3190 static void
3191 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3192 {
3193 unsigned i, j;
3194
3195 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3196 {
3197 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3198 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3199 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3200
3201 for (j = 0; j < ca->n; j++)
3202 if (affine_function_zero_p (ca->fns[j]))
3203 {
3204 insert_innermost_unit_dist_vector (ddr);
3205 return;
3206 }
3207
3208 for (j = 0; j < cb->n; j++)
3209 if (affine_function_zero_p (cb->fns[j]))
3210 {
3211 insert_innermost_unit_dist_vector (ddr);
3212 return;
3213 }
3214 }
3215 }
3216
3217 /* Compute the classic per loop distance vector. DDR is the data
3218 dependence relation to build a vector from. Return false when fail
3219 to represent the data dependence as a distance vector. */
3220
3221 static bool
3222 build_classic_dist_vector (struct data_dependence_relation *ddr,
3223 struct loop *loop_nest)
3224 {
3225 bool init_b = false;
3226 int index_carry = DDR_NB_LOOPS (ddr);
3227 lambda_vector dist_v;
3228
3229 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3230 return false;
3231
3232 if (same_access_functions (ddr))
3233 {
3234 /* Save the 0 vector. */
3235 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3236 save_dist_v (ddr, dist_v);
3237
3238 if (constant_access_functions (ddr))
3239 add_distance_for_zero_overlaps (ddr);
3240
3241 if (DDR_NB_LOOPS (ddr) > 1)
3242 add_other_self_distances (ddr);
3243
3244 return true;
3245 }
3246
3247 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3248 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3249 dist_v, &init_b, &index_carry))
3250 return false;
3251
3252 /* Save the distance vector if we initialized one. */
3253 if (init_b)
3254 {
3255 /* Verify a basic constraint: classic distance vectors should
3256 always be lexicographically positive.
3257
3258 Data references are collected in the order of execution of
3259 the program, thus for the following loop
3260
3261 | for (i = 1; i < 100; i++)
3262 | for (j = 1; j < 100; j++)
3263 | {
3264 | t = T[j+1][i-1]; // A
3265 | T[j][i] = t + 2; // B
3266 | }
3267
3268 references are collected following the direction of the wind:
3269 A then B. The data dependence tests are performed also
3270 following this order, such that we're looking at the distance
3271 separating the elements accessed by A from the elements later
3272 accessed by B. But in this example, the distance returned by
3273 test_dep (A, B) is lexicographically negative (-1, 1), that
3274 means that the access A occurs later than B with respect to
3275 the outer loop, ie. we're actually looking upwind. In this
3276 case we solve test_dep (B, A) looking downwind to the
3277 lexicographically positive solution, that returns the
3278 distance vector (1, -1). */
3279 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3280 {
3281 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3282 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3283 loop_nest))
3284 return false;
3285 compute_subscript_distance (ddr);
3286 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3287 save_v, &init_b, &index_carry))
3288 return false;
3289 save_dist_v (ddr, save_v);
3290 DDR_REVERSED_P (ddr) = true;
3291
3292 /* In this case there is a dependence forward for all the
3293 outer loops:
3294
3295 | for (k = 1; k < 100; k++)
3296 | for (i = 1; i < 100; i++)
3297 | for (j = 1; j < 100; j++)
3298 | {
3299 | t = T[j+1][i-1]; // A
3300 | T[j][i] = t + 2; // B
3301 | }
3302
3303 the vectors are:
3304 (0, 1, -1)
3305 (1, 1, -1)
3306 (1, -1, 1)
3307 */
3308 if (DDR_NB_LOOPS (ddr) > 1)
3309 {
3310 add_outer_distances (ddr, save_v, index_carry);
3311 add_outer_distances (ddr, dist_v, index_carry);
3312 }
3313 }
3314 else
3315 {
3316 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3317 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3318
3319 if (DDR_NB_LOOPS (ddr) > 1)
3320 {
3321 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3322
3323 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3324 DDR_A (ddr), loop_nest))
3325 return false;
3326 compute_subscript_distance (ddr);
3327 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3328 opposite_v, &init_b,
3329 &index_carry))
3330 return false;
3331
3332 save_dist_v (ddr, save_v);
3333 add_outer_distances (ddr, dist_v, index_carry);
3334 add_outer_distances (ddr, opposite_v, index_carry);
3335 }
3336 else
3337 save_dist_v (ddr, save_v);
3338 }
3339 }
3340 else
3341 {
3342 /* There is a distance of 1 on all the outer loops: Example:
3343 there is a dependence of distance 1 on loop_1 for the array A.
3344
3345 | loop_1
3346 | A[5] = ...
3347 | endloop
3348 */
3349 add_outer_distances (ddr, dist_v,
3350 lambda_vector_first_nz (dist_v,
3351 DDR_NB_LOOPS (ddr), 0));
3352 }
3353
3354 if (dump_file && (dump_flags & TDF_DETAILS))
3355 {
3356 unsigned i;
3357
3358 fprintf (dump_file, "(build_classic_dist_vector\n");
3359 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3360 {
3361 fprintf (dump_file, " dist_vector = (");
3362 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3363 DDR_NB_LOOPS (ddr));
3364 fprintf (dump_file, " )\n");
3365 }
3366 fprintf (dump_file, ")\n");
3367 }
3368
3369 return true;
3370 }
3371
3372 /* Return the direction for a given distance.
3373 FIXME: Computing dir this way is suboptimal, since dir can catch
3374 cases that dist is unable to represent. */
3375
3376 static inline enum data_dependence_direction
3377 dir_from_dist (int dist)
3378 {
3379 if (dist > 0)
3380 return dir_positive;
3381 else if (dist < 0)
3382 return dir_negative;
3383 else
3384 return dir_equal;
3385 }
3386
3387 /* Compute the classic per loop direction vector. DDR is the data
3388 dependence relation to build a vector from. */
3389
3390 static void
3391 build_classic_dir_vector (struct data_dependence_relation *ddr)
3392 {
3393 unsigned i, j;
3394 lambda_vector dist_v;
3395
3396 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3397 {
3398 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3399
3400 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3401 dir_v[j] = dir_from_dist (dist_v[j]);
3402
3403 save_dir_v (ddr, dir_v);
3404 }
3405 }
3406
3407 /* Helper function. Returns true when there is a dependence between
3408 data references DRA and DRB. */
3409
3410 static bool
3411 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3412 struct data_reference *dra,
3413 struct data_reference *drb,
3414 struct loop *loop_nest)
3415 {
3416 unsigned int i;
3417 tree last_conflicts;
3418 struct subscript *subscript;
3419
3420 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3421 i++)
3422 {
3423 conflict_function *overlaps_a, *overlaps_b;
3424
3425 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3426 DR_ACCESS_FN (drb, i),
3427 &overlaps_a, &overlaps_b,
3428 &last_conflicts, loop_nest);
3429
3430 if (CF_NOT_KNOWN_P (overlaps_a)
3431 || CF_NOT_KNOWN_P (overlaps_b))
3432 {
3433 finalize_ddr_dependent (ddr, chrec_dont_know);
3434 dependence_stats.num_dependence_undetermined++;
3435 free_conflict_function (overlaps_a);
3436 free_conflict_function (overlaps_b);
3437 return false;
3438 }
3439
3440 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3441 || CF_NO_DEPENDENCE_P (overlaps_b))
3442 {
3443 finalize_ddr_dependent (ddr, chrec_known);
3444 dependence_stats.num_dependence_independent++;
3445 free_conflict_function (overlaps_a);
3446 free_conflict_function (overlaps_b);
3447 return false;
3448 }
3449
3450 else
3451 {
3452 if (SUB_CONFLICTS_IN_A (subscript))
3453 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3454 if (SUB_CONFLICTS_IN_B (subscript))
3455 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3456
3457 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3458 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3459 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3460 }
3461 }
3462
3463 return true;
3464 }
3465
3466 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3467
3468 static void
3469 subscript_dependence_tester (struct data_dependence_relation *ddr,
3470 struct loop *loop_nest)
3471 {
3472
3473 if (dump_file && (dump_flags & TDF_DETAILS))
3474 fprintf (dump_file, "(subscript_dependence_tester \n");
3475
3476 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3477 dependence_stats.num_dependence_dependent++;
3478
3479 compute_subscript_distance (ddr);
3480 if (build_classic_dist_vector (ddr, loop_nest))
3481 build_classic_dir_vector (ddr);
3482
3483 if (dump_file && (dump_flags & TDF_DETAILS))
3484 fprintf (dump_file, ")\n");
3485 }
3486
3487 /* Returns true when all the access functions of A are affine or
3488 constant with respect to LOOP_NEST. */
3489
3490 static bool
3491 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3492 const struct loop *loop_nest)
3493 {
3494 unsigned int i;
3495 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3496 tree t;
3497
3498 FOR_EACH_VEC_ELT (tree, fns, i, t)
3499 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3500 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3501 return false;
3502
3503 return true;
3504 }
3505
3506 /* Initializes an equation for an OMEGA problem using the information
3507 contained in the ACCESS_FUN. Returns true when the operation
3508 succeeded.
3509
3510 PB is the omega constraint system.
3511 EQ is the number of the equation to be initialized.
3512 OFFSET is used for shifting the variables names in the constraints:
3513 a constrain is composed of 2 * the number of variables surrounding
3514 dependence accesses. OFFSET is set either to 0 for the first n variables,
3515 then it is set to n.
3516 ACCESS_FUN is expected to be an affine chrec. */
3517
3518 static bool
3519 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3520 unsigned int offset, tree access_fun,
3521 struct data_dependence_relation *ddr)
3522 {
3523 switch (TREE_CODE (access_fun))
3524 {
3525 case POLYNOMIAL_CHREC:
3526 {
3527 tree left = CHREC_LEFT (access_fun);
3528 tree right = CHREC_RIGHT (access_fun);
3529 int var = CHREC_VARIABLE (access_fun);
3530 unsigned var_idx;
3531
3532 if (TREE_CODE (right) != INTEGER_CST)
3533 return false;
3534
3535 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3536 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3537
3538 /* Compute the innermost loop index. */
3539 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3540
3541 if (offset == 0)
3542 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3543 += int_cst_value (right);
3544
3545 switch (TREE_CODE (left))
3546 {
3547 case POLYNOMIAL_CHREC:
3548 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3549
3550 case INTEGER_CST:
3551 pb->eqs[eq].coef[0] += int_cst_value (left);
3552 return true;
3553
3554 default:
3555 return false;
3556 }
3557 }
3558
3559 case INTEGER_CST:
3560 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3561 return true;
3562
3563 default:
3564 return false;
3565 }
3566 }
3567
3568 /* As explained in the comments preceding init_omega_for_ddr, we have
3569 to set up a system for each loop level, setting outer loops
3570 variation to zero, and current loop variation to positive or zero.
3571 Save each lexico positive distance vector. */
3572
3573 static void
3574 omega_extract_distance_vectors (omega_pb pb,
3575 struct data_dependence_relation *ddr)
3576 {
3577 int eq, geq;
3578 unsigned i, j;
3579 struct loop *loopi, *loopj;
3580 enum omega_result res;
3581
3582 /* Set a new problem for each loop in the nest. The basis is the
3583 problem that we have initialized until now. On top of this we
3584 add new constraints. */
3585 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3586 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3587 {
3588 int dist = 0;
3589 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3590 DDR_NB_LOOPS (ddr));
3591
3592 omega_copy_problem (copy, pb);
3593
3594 /* For all the outer loops "loop_j", add "dj = 0". */
3595 for (j = 0;
3596 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3597 {
3598 eq = omega_add_zero_eq (copy, omega_black);
3599 copy->eqs[eq].coef[j + 1] = 1;
3600 }
3601
3602 /* For "loop_i", add "0 <= di". */
3603 geq = omega_add_zero_geq (copy, omega_black);
3604 copy->geqs[geq].coef[i + 1] = 1;
3605
3606 /* Reduce the constraint system, and test that the current
3607 problem is feasible. */
3608 res = omega_simplify_problem (copy);
3609 if (res == omega_false
3610 || res == omega_unknown
3611 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3612 goto next_problem;
3613
3614 for (eq = 0; eq < copy->num_subs; eq++)
3615 if (copy->subs[eq].key == (int) i + 1)
3616 {
3617 dist = copy->subs[eq].coef[0];
3618 goto found_dist;
3619 }
3620
3621 if (dist == 0)
3622 {
3623 /* Reinitialize problem... */
3624 omega_copy_problem (copy, pb);
3625 for (j = 0;
3626 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3627 {
3628 eq = omega_add_zero_eq (copy, omega_black);
3629 copy->eqs[eq].coef[j + 1] = 1;
3630 }
3631
3632 /* ..., but this time "di = 1". */
3633 eq = omega_add_zero_eq (copy, omega_black);
3634 copy->eqs[eq].coef[i + 1] = 1;
3635 copy->eqs[eq].coef[0] = -1;
3636
3637 res = omega_simplify_problem (copy);
3638 if (res == omega_false
3639 || res == omega_unknown
3640 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3641 goto next_problem;
3642
3643 for (eq = 0; eq < copy->num_subs; eq++)
3644 if (copy->subs[eq].key == (int) i + 1)
3645 {
3646 dist = copy->subs[eq].coef[0];
3647 goto found_dist;
3648 }
3649 }
3650
3651 found_dist:;
3652 /* Save the lexicographically positive distance vector. */
3653 if (dist >= 0)
3654 {
3655 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3656 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3657
3658 dist_v[i] = dist;
3659
3660 for (eq = 0; eq < copy->num_subs; eq++)
3661 if (copy->subs[eq].key > 0)
3662 {
3663 dist = copy->subs[eq].coef[0];
3664 dist_v[copy->subs[eq].key - 1] = dist;
3665 }
3666
3667 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3668 dir_v[j] = dir_from_dist (dist_v[j]);
3669
3670 save_dist_v (ddr, dist_v);
3671 save_dir_v (ddr, dir_v);
3672 }
3673
3674 next_problem:;
3675 omega_free_problem (copy);
3676 }
3677 }
3678
3679 /* This is called for each subscript of a tuple of data references:
3680 insert an equality for representing the conflicts. */
3681
3682 static bool
3683 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3684 struct data_dependence_relation *ddr,
3685 omega_pb pb, bool *maybe_dependent)
3686 {
3687 int eq;
3688 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3689 TREE_TYPE (access_fun_b));
3690 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3691 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3692 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3693 tree minus_one;
3694
3695 /* When the fun_a - fun_b is not constant, the dependence is not
3696 captured by the classic distance vector representation. */
3697 if (TREE_CODE (difference) != INTEGER_CST)
3698 return false;
3699
3700 /* ZIV test. */
3701 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3702 {
3703 /* There is no dependence. */
3704 *maybe_dependent = false;
3705 return true;
3706 }
3707
3708 minus_one = build_int_cst (type, -1);
3709 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3710
3711 eq = omega_add_zero_eq (pb, omega_black);
3712 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3713 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3714 /* There is probably a dependence, but the system of
3715 constraints cannot be built: answer "don't know". */
3716 return false;
3717
3718 /* GCD test. */
3719 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3720 && !int_divides_p (lambda_vector_gcd
3721 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3722 2 * DDR_NB_LOOPS (ddr)),
3723 pb->eqs[eq].coef[0]))
3724 {
3725 /* There is no dependence. */
3726 *maybe_dependent = false;
3727 return true;
3728 }
3729
3730 return true;
3731 }
3732
3733 /* Helper function, same as init_omega_for_ddr but specialized for
3734 data references A and B. */
3735
3736 static bool
3737 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3738 struct data_dependence_relation *ddr,
3739 omega_pb pb, bool *maybe_dependent)
3740 {
3741 unsigned i;
3742 int ineq;
3743 struct loop *loopi;
3744 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3745
3746 /* Insert an equality per subscript. */
3747 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3748 {
3749 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3750 ddr, pb, maybe_dependent))
3751 return false;
3752 else if (*maybe_dependent == false)
3753 {
3754 /* There is no dependence. */
3755 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3756 return true;
3757 }
3758 }
3759
3760 /* Insert inequalities: constraints corresponding to the iteration
3761 domain, i.e. the loops surrounding the references "loop_x" and
3762 the distance variables "dx". The layout of the OMEGA
3763 representation is as follows:
3764 - coef[0] is the constant
3765 - coef[1..nb_loops] are the protected variables that will not be
3766 removed by the solver: the "dx"
3767 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3768 */
3769 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3770 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3771 {
3772 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3773
3774 /* 0 <= loop_x */
3775 ineq = omega_add_zero_geq (pb, omega_black);
3776 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3777
3778 /* 0 <= loop_x + dx */
3779 ineq = omega_add_zero_geq (pb, omega_black);
3780 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3781 pb->geqs[ineq].coef[i + 1] = 1;
3782
3783 if (nbi != -1)
3784 {
3785 /* loop_x <= nb_iters */
3786 ineq = omega_add_zero_geq (pb, omega_black);
3787 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3788 pb->geqs[ineq].coef[0] = nbi;
3789
3790 /* loop_x + dx <= nb_iters */
3791 ineq = omega_add_zero_geq (pb, omega_black);
3792 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3793 pb->geqs[ineq].coef[i + 1] = -1;
3794 pb->geqs[ineq].coef[0] = nbi;
3795
3796 /* A step "dx" bigger than nb_iters is not feasible, so
3797 add "0 <= nb_iters + dx", */
3798 ineq = omega_add_zero_geq (pb, omega_black);
3799 pb->geqs[ineq].coef[i + 1] = 1;
3800 pb->geqs[ineq].coef[0] = nbi;
3801 /* and "dx <= nb_iters". */
3802 ineq = omega_add_zero_geq (pb, omega_black);
3803 pb->geqs[ineq].coef[i + 1] = -1;
3804 pb->geqs[ineq].coef[0] = nbi;
3805 }
3806 }
3807
3808 omega_extract_distance_vectors (pb, ddr);
3809
3810 return true;
3811 }
3812
3813 /* Sets up the Omega dependence problem for the data dependence
3814 relation DDR. Returns false when the constraint system cannot be
3815 built, ie. when the test answers "don't know". Returns true
3816 otherwise, and when independence has been proved (using one of the
3817 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3818 set MAYBE_DEPENDENT to true.
3819
3820 Example: for setting up the dependence system corresponding to the
3821 conflicting accesses
3822
3823 | loop_i
3824 | loop_j
3825 | A[i, i+1] = ...
3826 | ... A[2*j, 2*(i + j)]
3827 | endloop_j
3828 | endloop_i
3829
3830 the following constraints come from the iteration domain:
3831
3832 0 <= i <= Ni
3833 0 <= i + di <= Ni
3834 0 <= j <= Nj
3835 0 <= j + dj <= Nj
3836
3837 where di, dj are the distance variables. The constraints
3838 representing the conflicting elements are:
3839
3840 i = 2 * (j + dj)
3841 i + 1 = 2 * (i + di + j + dj)
3842
3843 For asking that the resulting distance vector (di, dj) be
3844 lexicographically positive, we insert the constraint "di >= 0". If
3845 "di = 0" in the solution, we fix that component to zero, and we
3846 look at the inner loops: we set a new problem where all the outer
3847 loop distances are zero, and fix this inner component to be
3848 positive. When one of the components is positive, we save that
3849 distance, and set a new problem where the distance on this loop is
3850 zero, searching for other distances in the inner loops. Here is
3851 the classic example that illustrates that we have to set for each
3852 inner loop a new problem:
3853
3854 | loop_1
3855 | loop_2
3856 | A[10]
3857 | endloop_2
3858 | endloop_1
3859
3860 we have to save two distances (1, 0) and (0, 1).
3861
3862 Given two array references, refA and refB, we have to set the
3863 dependence problem twice, refA vs. refB and refB vs. refA, and we
3864 cannot do a single test, as refB might occur before refA in the
3865 inner loops, and the contrary when considering outer loops: ex.
3866
3867 | loop_0
3868 | loop_1
3869 | loop_2
3870 | T[{1,+,1}_2][{1,+,1}_1] // refA
3871 | T[{2,+,1}_2][{0,+,1}_1] // refB
3872 | endloop_2
3873 | endloop_1
3874 | endloop_0
3875
3876 refB touches the elements in T before refA, and thus for the same
3877 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3878 but for successive loop_0 iterations, we have (1, -1, 1)
3879
3880 The Omega solver expects the distance variables ("di" in the
3881 previous example) to come first in the constraint system (as
3882 variables to be protected, or "safe" variables), the constraint
3883 system is built using the following layout:
3884
3885 "cst | distance vars | index vars".
3886 */
3887
3888 static bool
3889 init_omega_for_ddr (struct data_dependence_relation *ddr,
3890 bool *maybe_dependent)
3891 {
3892 omega_pb pb;
3893 bool res = false;
3894
3895 *maybe_dependent = true;
3896
3897 if (same_access_functions (ddr))
3898 {
3899 unsigned j;
3900 lambda_vector dir_v;
3901
3902 /* Save the 0 vector. */
3903 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3904 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3905 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3906 dir_v[j] = dir_equal;
3907 save_dir_v (ddr, dir_v);
3908
3909 /* Save the dependences carried by outer loops. */
3910 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3911 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3912 maybe_dependent);
3913 omega_free_problem (pb);
3914 return res;
3915 }
3916
3917 /* Omega expects the protected variables (those that have to be kept
3918 after elimination) to appear first in the constraint system.
3919 These variables are the distance variables. In the following
3920 initialization we declare NB_LOOPS safe variables, and the total
3921 number of variables for the constraint system is 2*NB_LOOPS. */
3922 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3923 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3924 maybe_dependent);
3925 omega_free_problem (pb);
3926
3927 /* Stop computation if not decidable, or no dependence. */
3928 if (res == false || *maybe_dependent == false)
3929 return res;
3930
3931 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3932 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3933 maybe_dependent);
3934 omega_free_problem (pb);
3935
3936 return res;
3937 }
3938
3939 /* Return true when DDR contains the same information as that stored
3940 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3941
3942 static bool
3943 ddr_consistent_p (FILE *file,
3944 struct data_dependence_relation *ddr,
3945 VEC (lambda_vector, heap) *dist_vects,
3946 VEC (lambda_vector, heap) *dir_vects)
3947 {
3948 unsigned int i, j;
3949
3950 /* If dump_file is set, output there. */
3951 if (dump_file && (dump_flags & TDF_DETAILS))
3952 file = dump_file;
3953
3954 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3955 {
3956 lambda_vector b_dist_v;
3957 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3958 VEC_length (lambda_vector, dist_vects),
3959 DDR_NUM_DIST_VECTS (ddr));
3960
3961 fprintf (file, "Banerjee dist vectors:\n");
3962 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3963 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3964
3965 fprintf (file, "Omega dist vectors:\n");
3966 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3967 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3968
3969 fprintf (file, "data dependence relation:\n");
3970 dump_data_dependence_relation (file, ddr);
3971
3972 fprintf (file, ")\n");
3973 return false;
3974 }
3975
3976 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3977 {
3978 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3979 VEC_length (lambda_vector, dir_vects),
3980 DDR_NUM_DIR_VECTS (ddr));
3981 return false;
3982 }
3983
3984 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3985 {
3986 lambda_vector a_dist_v;
3987 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3988
3989 /* Distance vectors are not ordered in the same way in the DDR
3990 and in the DIST_VECTS: search for a matching vector. */
3991 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3992 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3993 break;
3994
3995 if (j == VEC_length (lambda_vector, dist_vects))
3996 {
3997 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3998 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3999 fprintf (file, "not found in Omega dist vectors:\n");
4000 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4001 fprintf (file, "data dependence relation:\n");
4002 dump_data_dependence_relation (file, ddr);
4003 fprintf (file, ")\n");
4004 }
4005 }
4006
4007 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4008 {
4009 lambda_vector a_dir_v;
4010 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4011
4012 /* Direction vectors are not ordered in the same way in the DDR
4013 and in the DIR_VECTS: search for a matching vector. */
4014 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4015 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4016 break;
4017
4018 if (j == VEC_length (lambda_vector, dist_vects))
4019 {
4020 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4021 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4022 fprintf (file, "not found in Omega dir vectors:\n");
4023 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4024 fprintf (file, "data dependence relation:\n");
4025 dump_data_dependence_relation (file, ddr);
4026 fprintf (file, ")\n");
4027 }
4028 }
4029
4030 return true;
4031 }
4032
4033 /* This computes the affine dependence relation between A and B with
4034 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4035 independence between two accesses, while CHREC_DONT_KNOW is used
4036 for representing the unknown relation.
4037
4038 Note that it is possible to stop the computation of the dependence
4039 relation the first time we detect a CHREC_KNOWN element for a given
4040 subscript. */
4041
4042 static void
4043 compute_affine_dependence (struct data_dependence_relation *ddr,
4044 struct loop *loop_nest)
4045 {
4046 struct data_reference *dra = DDR_A (ddr);
4047 struct data_reference *drb = DDR_B (ddr);
4048
4049 if (dump_file && (dump_flags & TDF_DETAILS))
4050 {
4051 fprintf (dump_file, "(compute_affine_dependence\n");
4052 fprintf (dump_file, " (stmt_a = \n");
4053 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4054 fprintf (dump_file, ")\n (stmt_b = \n");
4055 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4056 fprintf (dump_file, ")\n");
4057 }
4058
4059 /* Analyze only when the dependence relation is not yet known. */
4060 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4061 {
4062 dependence_stats.num_dependence_tests++;
4063
4064 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4065 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4066 {
4067 if (flag_check_data_deps)
4068 {
4069 /* Compute the dependences using the first algorithm. */
4070 subscript_dependence_tester (ddr, loop_nest);
4071
4072 if (dump_file && (dump_flags & TDF_DETAILS))
4073 {
4074 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4075 dump_data_dependence_relation (dump_file, ddr);
4076 }
4077
4078 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4079 {
4080 bool maybe_dependent;
4081 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4082
4083 /* Save the result of the first DD analyzer. */
4084 dist_vects = DDR_DIST_VECTS (ddr);
4085 dir_vects = DDR_DIR_VECTS (ddr);
4086
4087 /* Reset the information. */
4088 DDR_DIST_VECTS (ddr) = NULL;
4089 DDR_DIR_VECTS (ddr) = NULL;
4090
4091 /* Compute the same information using Omega. */
4092 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4093 goto csys_dont_know;
4094
4095 if (dump_file && (dump_flags & TDF_DETAILS))
4096 {
4097 fprintf (dump_file, "Omega Analyzer\n");
4098 dump_data_dependence_relation (dump_file, ddr);
4099 }
4100
4101 /* Check that we get the same information. */
4102 if (maybe_dependent)
4103 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4104 dir_vects));
4105 }
4106 }
4107 else
4108 subscript_dependence_tester (ddr, loop_nest);
4109 }
4110
4111 /* As a last case, if the dependence cannot be determined, or if
4112 the dependence is considered too difficult to determine, answer
4113 "don't know". */
4114 else
4115 {
4116 csys_dont_know:;
4117 dependence_stats.num_dependence_undetermined++;
4118
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4120 {
4121 fprintf (dump_file, "Data ref a:\n");
4122 dump_data_reference (dump_file, dra);
4123 fprintf (dump_file, "Data ref b:\n");
4124 dump_data_reference (dump_file, drb);
4125 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4126 }
4127 finalize_ddr_dependent (ddr, chrec_dont_know);
4128 }
4129 }
4130
4131 if (dump_file && (dump_flags & TDF_DETAILS))
4132 fprintf (dump_file, ")\n");
4133 }
4134
4135 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4136 the data references in DATAREFS, in the LOOP_NEST. When
4137 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4138 relations. Return true when successful, i.e. data references number
4139 is small enough to be handled. */
4140
4141 bool
4142 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4143 VEC (ddr_p, heap) **dependence_relations,
4144 VEC (loop_p, heap) *loop_nest,
4145 bool compute_self_and_rr)
4146 {
4147 struct data_dependence_relation *ddr;
4148 struct data_reference *a, *b;
4149 unsigned int i, j;
4150
4151 if ((int) VEC_length (data_reference_p, datarefs)
4152 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4153 {
4154 struct data_dependence_relation *ddr;
4155
4156 /* Insert a single relation into dependence_relations:
4157 chrec_dont_know. */
4158 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4159 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4160 return false;
4161 }
4162
4163 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4164 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4165 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4166 {
4167 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4168 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4169 if (loop_nest)
4170 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4171 }
4172
4173 if (compute_self_and_rr)
4174 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4175 {
4176 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4177 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4178 if (loop_nest)
4179 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4180 }
4181
4182 return true;
4183 }
4184
4185 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4186 true if STMT clobbers memory, false otherwise. */
4187
4188 bool
4189 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4190 {
4191 bool clobbers_memory = false;
4192 data_ref_loc *ref;
4193 tree *op0, *op1;
4194 enum gimple_code stmt_code = gimple_code (stmt);
4195
4196 *references = NULL;
4197
4198 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4199 Calls have side-effects, except those to const or pure
4200 functions. */
4201 if ((stmt_code == GIMPLE_CALL
4202 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4203 || (stmt_code == GIMPLE_ASM
4204 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4205 clobbers_memory = true;
4206
4207 if (!gimple_vuse (stmt))
4208 return clobbers_memory;
4209
4210 if (stmt_code == GIMPLE_ASSIGN)
4211 {
4212 tree base;
4213 op0 = gimple_assign_lhs_ptr (stmt);
4214 op1 = gimple_assign_rhs1_ptr (stmt);
4215
4216 if (DECL_P (*op1)
4217 || (REFERENCE_CLASS_P (*op1)
4218 && (base = get_base_address (*op1))
4219 && TREE_CODE (base) != SSA_NAME))
4220 {
4221 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4222 ref->pos = op1;
4223 ref->is_read = true;
4224 }
4225 }
4226 else if (stmt_code == GIMPLE_CALL)
4227 {
4228 unsigned i, n;
4229
4230 op0 = gimple_call_lhs_ptr (stmt);
4231 n = gimple_call_num_args (stmt);
4232 for (i = 0; i < n; i++)
4233 {
4234 op1 = gimple_call_arg_ptr (stmt, i);
4235
4236 if (DECL_P (*op1)
4237 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4238 {
4239 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4240 ref->pos = op1;
4241 ref->is_read = true;
4242 }
4243 }
4244 }
4245 else
4246 return clobbers_memory;
4247
4248 if (*op0
4249 && (DECL_P (*op0)
4250 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4251 {
4252 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4253 ref->pos = op0;
4254 ref->is_read = false;
4255 }
4256 return clobbers_memory;
4257 }
4258
4259 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4260 reference, returns false, otherwise returns true. NEST is the outermost
4261 loop of the loop nest in which the references should be analyzed. */
4262
4263 bool
4264 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4265 VEC (data_reference_p, heap) **datarefs)
4266 {
4267 unsigned i;
4268 VEC (data_ref_loc, heap) *references;
4269 data_ref_loc *ref;
4270 bool ret = true;
4271 data_reference_p dr;
4272
4273 if (get_references_in_stmt (stmt, &references))
4274 {
4275 VEC_free (data_ref_loc, heap, references);
4276 return false;
4277 }
4278
4279 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4280 {
4281 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4282 *ref->pos, stmt, ref->is_read);
4283 gcc_assert (dr != NULL);
4284 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4285 }
4286 VEC_free (data_ref_loc, heap, references);
4287 return ret;
4288 }
4289
4290 /* Stores the data references in STMT to DATAREFS. If there is an
4291 unanalyzable reference, returns false, otherwise returns true.
4292 NEST is the outermost loop of the loop nest in which the references
4293 should be instantiated, LOOP is the loop in which the references
4294 should be analyzed. */
4295
4296 bool
4297 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4298 VEC (data_reference_p, heap) **datarefs)
4299 {
4300 unsigned i;
4301 VEC (data_ref_loc, heap) *references;
4302 data_ref_loc *ref;
4303 bool ret = true;
4304 data_reference_p dr;
4305
4306 if (get_references_in_stmt (stmt, &references))
4307 {
4308 VEC_free (data_ref_loc, heap, references);
4309 return false;
4310 }
4311
4312 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4313 {
4314 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4315 gcc_assert (dr != NULL);
4316 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4317 }
4318
4319 VEC_free (data_ref_loc, heap, references);
4320 return ret;
4321 }
4322
4323 /* Search the data references in LOOP, and record the information into
4324 DATAREFS. Returns chrec_dont_know when failing to analyze a
4325 difficult case, returns NULL_TREE otherwise. */
4326
4327 tree
4328 find_data_references_in_bb (struct loop *loop, basic_block bb,
4329 VEC (data_reference_p, heap) **datarefs)
4330 {
4331 gimple_stmt_iterator bsi;
4332
4333 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4334 {
4335 gimple stmt = gsi_stmt (bsi);
4336
4337 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4338 {
4339 struct data_reference *res;
4340 res = XCNEW (struct data_reference);
4341 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4342
4343 return chrec_dont_know;
4344 }
4345 }
4346
4347 return NULL_TREE;
4348 }
4349
4350 /* Search the data references in LOOP, and record the information into
4351 DATAREFS. Returns chrec_dont_know when failing to analyze a
4352 difficult case, returns NULL_TREE otherwise.
4353
4354 TODO: This function should be made smarter so that it can handle address
4355 arithmetic as if they were array accesses, etc. */
4356
4357 static tree
4358 find_data_references_in_loop (struct loop *loop,
4359 VEC (data_reference_p, heap) **datarefs)
4360 {
4361 basic_block bb, *bbs;
4362 unsigned int i;
4363
4364 bbs = get_loop_body_in_dom_order (loop);
4365
4366 for (i = 0; i < loop->num_nodes; i++)
4367 {
4368 bb = bbs[i];
4369
4370 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4371 {
4372 free (bbs);
4373 return chrec_dont_know;
4374 }
4375 }
4376 free (bbs);
4377
4378 return NULL_TREE;
4379 }
4380
4381 /* Recursive helper function. */
4382
4383 static bool
4384 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4385 {
4386 /* Inner loops of the nest should not contain siblings. Example:
4387 when there are two consecutive loops,
4388
4389 | loop_0
4390 | loop_1
4391 | A[{0, +, 1}_1]
4392 | endloop_1
4393 | loop_2
4394 | A[{0, +, 1}_2]
4395 | endloop_2
4396 | endloop_0
4397
4398 the dependence relation cannot be captured by the distance
4399 abstraction. */
4400 if (loop->next)
4401 return false;
4402
4403 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4404 if (loop->inner)
4405 return find_loop_nest_1 (loop->inner, loop_nest);
4406 return true;
4407 }
4408
4409 /* Return false when the LOOP is not well nested. Otherwise return
4410 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4411 contain the loops from the outermost to the innermost, as they will
4412 appear in the classic distance vector. */
4413
4414 bool
4415 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4416 {
4417 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4418 if (loop->inner)
4419 return find_loop_nest_1 (loop->inner, loop_nest);
4420 return true;
4421 }
4422
4423 /* Returns true when the data dependences have been computed, false otherwise.
4424 Given a loop nest LOOP, the following vectors are returned:
4425 DATAREFS is initialized to all the array elements contained in this loop,
4426 DEPENDENCE_RELATIONS contains the relations between the data references.
4427 Compute read-read and self relations if
4428 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4429
4430 bool
4431 compute_data_dependences_for_loop (struct loop *loop,
4432 bool compute_self_and_read_read_dependences,
4433 VEC (loop_p, heap) **loop_nest,
4434 VEC (data_reference_p, heap) **datarefs,
4435 VEC (ddr_p, heap) **dependence_relations)
4436 {
4437 bool res = true;
4438
4439 memset (&dependence_stats, 0, sizeof (dependence_stats));
4440
4441 /* If the loop nest is not well formed, or one of the data references
4442 is not computable, give up without spending time to compute other
4443 dependences. */
4444 if (!loop
4445 || !find_loop_nest (loop, loop_nest)
4446 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4447 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4448 compute_self_and_read_read_dependences))
4449 res = false;
4450
4451 if (dump_file && (dump_flags & TDF_STATS))
4452 {
4453 fprintf (dump_file, "Dependence tester statistics:\n");
4454
4455 fprintf (dump_file, "Number of dependence tests: %d\n",
4456 dependence_stats.num_dependence_tests);
4457 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4458 dependence_stats.num_dependence_dependent);
4459 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4460 dependence_stats.num_dependence_independent);
4461 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4462 dependence_stats.num_dependence_undetermined);
4463
4464 fprintf (dump_file, "Number of subscript tests: %d\n",
4465 dependence_stats.num_subscript_tests);
4466 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4467 dependence_stats.num_subscript_undetermined);
4468 fprintf (dump_file, "Number of same subscript function: %d\n",
4469 dependence_stats.num_same_subscript_function);
4470
4471 fprintf (dump_file, "Number of ziv tests: %d\n",
4472 dependence_stats.num_ziv);
4473 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4474 dependence_stats.num_ziv_dependent);
4475 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4476 dependence_stats.num_ziv_independent);
4477 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4478 dependence_stats.num_ziv_unimplemented);
4479
4480 fprintf (dump_file, "Number of siv tests: %d\n",
4481 dependence_stats.num_siv);
4482 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4483 dependence_stats.num_siv_dependent);
4484 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4485 dependence_stats.num_siv_independent);
4486 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4487 dependence_stats.num_siv_unimplemented);
4488
4489 fprintf (dump_file, "Number of miv tests: %d\n",
4490 dependence_stats.num_miv);
4491 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4492 dependence_stats.num_miv_dependent);
4493 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4494 dependence_stats.num_miv_independent);
4495 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4496 dependence_stats.num_miv_unimplemented);
4497 }
4498
4499 return res;
4500 }
4501
4502 /* Returns true when the data dependences for the basic block BB have been
4503 computed, false otherwise.
4504 DATAREFS is initialized to all the array elements contained in this basic
4505 block, DEPENDENCE_RELATIONS contains the relations between the data
4506 references. Compute read-read and self relations if
4507 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4508 bool
4509 compute_data_dependences_for_bb (basic_block bb,
4510 bool compute_self_and_read_read_dependences,
4511 VEC (data_reference_p, heap) **datarefs,
4512 VEC (ddr_p, heap) **dependence_relations)
4513 {
4514 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4515 return false;
4516
4517 return compute_all_dependences (*datarefs, dependence_relations, NULL,
4518 compute_self_and_read_read_dependences);
4519 }
4520
4521 /* Entry point (for testing only). Analyze all the data references
4522 and the dependence relations in LOOP.
4523
4524 The data references are computed first.
4525
4526 A relation on these nodes is represented by a complete graph. Some
4527 of the relations could be of no interest, thus the relations can be
4528 computed on demand.
4529
4530 In the following function we compute all the relations. This is
4531 just a first implementation that is here for:
4532 - for showing how to ask for the dependence relations,
4533 - for the debugging the whole dependence graph,
4534 - for the dejagnu testcases and maintenance.
4535
4536 It is possible to ask only for a part of the graph, avoiding to
4537 compute the whole dependence graph. The computed dependences are
4538 stored in a knowledge base (KB) such that later queries don't
4539 recompute the same information. The implementation of this KB is
4540 transparent to the optimizer, and thus the KB can be changed with a
4541 more efficient implementation, or the KB could be disabled. */
4542 static void
4543 analyze_all_data_dependences (struct loop *loop)
4544 {
4545 unsigned int i;
4546 int nb_data_refs = 10;
4547 VEC (data_reference_p, heap) *datarefs =
4548 VEC_alloc (data_reference_p, heap, nb_data_refs);
4549 VEC (ddr_p, heap) *dependence_relations =
4550 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4551 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4552
4553 /* Compute DDs on the whole function. */
4554 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4555 &dependence_relations);
4556
4557 if (dump_file)
4558 {
4559 dump_data_dependence_relations (dump_file, dependence_relations);
4560 fprintf (dump_file, "\n\n");
4561
4562 if (dump_flags & TDF_DETAILS)
4563 dump_dist_dir_vectors (dump_file, dependence_relations);
4564
4565 if (dump_flags & TDF_STATS)
4566 {
4567 unsigned nb_top_relations = 0;
4568 unsigned nb_bot_relations = 0;
4569 unsigned nb_chrec_relations = 0;
4570 struct data_dependence_relation *ddr;
4571
4572 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4573 {
4574 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4575 nb_top_relations++;
4576
4577 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4578 nb_bot_relations++;
4579
4580 else
4581 nb_chrec_relations++;
4582 }
4583
4584 gather_stats_on_scev_database ();
4585 }
4586 }
4587
4588 VEC_free (loop_p, heap, loop_nest);
4589 free_dependence_relations (dependence_relations);
4590 free_data_refs (datarefs);
4591 }
4592
4593 /* Computes all the data dependences and check that the results of
4594 several analyzers are the same. */
4595
4596 void
4597 tree_check_data_deps (void)
4598 {
4599 loop_iterator li;
4600 struct loop *loop_nest;
4601
4602 FOR_EACH_LOOP (li, loop_nest, 0)
4603 analyze_all_data_dependences (loop_nest);
4604 }
4605
4606 /* Free the memory used by a data dependence relation DDR. */
4607
4608 void
4609 free_dependence_relation (struct data_dependence_relation *ddr)
4610 {
4611 if (ddr == NULL)
4612 return;
4613
4614 if (DDR_SUBSCRIPTS (ddr))
4615 free_subscripts (DDR_SUBSCRIPTS (ddr));
4616 if (DDR_DIST_VECTS (ddr))
4617 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4618 if (DDR_DIR_VECTS (ddr))
4619 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4620
4621 free (ddr);
4622 }
4623
4624 /* Free the memory used by the data dependence relations from
4625 DEPENDENCE_RELATIONS. */
4626
4627 void
4628 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4629 {
4630 unsigned int i;
4631 struct data_dependence_relation *ddr;
4632
4633 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4634 if (ddr)
4635 free_dependence_relation (ddr);
4636
4637 VEC_free (ddr_p, heap, dependence_relations);
4638 }
4639
4640 /* Free the memory used by the data references from DATAREFS. */
4641
4642 void
4643 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4644 {
4645 unsigned int i;
4646 struct data_reference *dr;
4647
4648 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4649 free_data_ref (dr);
4650 VEC_free (data_reference_p, heap, datarefs);
4651 }
4652
4653 \f
4654
4655 /* Dump vertex I in RDG to FILE. */
4656
4657 void
4658 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4659 {
4660 struct vertex *v = &(rdg->vertices[i]);
4661 struct graph_edge *e;
4662
4663 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4664 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4665 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4666
4667 if (v->pred)
4668 for (e = v->pred; e; e = e->pred_next)
4669 fprintf (file, " %d", e->src);
4670
4671 fprintf (file, ") (out:");
4672
4673 if (v->succ)
4674 for (e = v->succ; e; e = e->succ_next)
4675 fprintf (file, " %d", e->dest);
4676
4677 fprintf (file, ")\n");
4678 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4679 fprintf (file, ")\n");
4680 }
4681
4682 /* Call dump_rdg_vertex on stderr. */
4683
4684 DEBUG_FUNCTION void
4685 debug_rdg_vertex (struct graph *rdg, int i)
4686 {
4687 dump_rdg_vertex (stderr, rdg, i);
4688 }
4689
4690 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4691 dumped vertices to that bitmap. */
4692
4693 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4694 {
4695 int i;
4696
4697 fprintf (file, "(%d\n", c);
4698
4699 for (i = 0; i < rdg->n_vertices; i++)
4700 if (rdg->vertices[i].component == c)
4701 {
4702 if (dumped)
4703 bitmap_set_bit (dumped, i);
4704
4705 dump_rdg_vertex (file, rdg, i);
4706 }
4707
4708 fprintf (file, ")\n");
4709 }
4710
4711 /* Call dump_rdg_vertex on stderr. */
4712
4713 DEBUG_FUNCTION void
4714 debug_rdg_component (struct graph *rdg, int c)
4715 {
4716 dump_rdg_component (stderr, rdg, c, NULL);
4717 }
4718
4719 /* Dump the reduced dependence graph RDG to FILE. */
4720
4721 void
4722 dump_rdg (FILE *file, struct graph *rdg)
4723 {
4724 int i;
4725 bitmap dumped = BITMAP_ALLOC (NULL);
4726
4727 fprintf (file, "(rdg\n");
4728
4729 for (i = 0; i < rdg->n_vertices; i++)
4730 if (!bitmap_bit_p (dumped, i))
4731 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4732
4733 fprintf (file, ")\n");
4734 BITMAP_FREE (dumped);
4735 }
4736
4737 /* Call dump_rdg on stderr. */
4738
4739 DEBUG_FUNCTION void
4740 debug_rdg (struct graph *rdg)
4741 {
4742 dump_rdg (stderr, rdg);
4743 }
4744
4745 static void
4746 dot_rdg_1 (FILE *file, struct graph *rdg)
4747 {
4748 int i;
4749
4750 fprintf (file, "digraph RDG {\n");
4751
4752 for (i = 0; i < rdg->n_vertices; i++)
4753 {
4754 struct vertex *v = &(rdg->vertices[i]);
4755 struct graph_edge *e;
4756
4757 /* Highlight reads from memory. */
4758 if (RDG_MEM_READS_STMT (rdg, i))
4759 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4760
4761 /* Highlight stores to memory. */
4762 if (RDG_MEM_WRITE_STMT (rdg, i))
4763 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4764
4765 if (v->succ)
4766 for (e = v->succ; e; e = e->succ_next)
4767 switch (RDGE_TYPE (e))
4768 {
4769 case input_dd:
4770 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4771 break;
4772
4773 case output_dd:
4774 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4775 break;
4776
4777 case flow_dd:
4778 /* These are the most common dependences: don't print these. */
4779 fprintf (file, "%d -> %d \n", i, e->dest);
4780 break;
4781
4782 case anti_dd:
4783 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4784 break;
4785
4786 default:
4787 gcc_unreachable ();
4788 }
4789 }
4790
4791 fprintf (file, "}\n\n");
4792 }
4793
4794 /* Display the Reduced Dependence Graph using dotty. */
4795 extern void dot_rdg (struct graph *);
4796
4797 DEBUG_FUNCTION void
4798 dot_rdg (struct graph *rdg)
4799 {
4800 /* When debugging, enable the following code. This cannot be used
4801 in production compilers because it calls "system". */
4802 #if 0
4803 FILE *file = fopen ("/tmp/rdg.dot", "w");
4804 gcc_assert (file != NULL);
4805
4806 dot_rdg_1 (file, rdg);
4807 fclose (file);
4808
4809 system ("dotty /tmp/rdg.dot &");
4810 #else
4811 dot_rdg_1 (stderr, rdg);
4812 #endif
4813 }
4814
4815 /* This structure is used for recording the mapping statement index in
4816 the RDG. */
4817
4818 struct GTY(()) rdg_vertex_info
4819 {
4820 gimple stmt;
4821 int index;
4822 };
4823
4824 /* Returns the index of STMT in RDG. */
4825
4826 int
4827 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4828 {
4829 struct rdg_vertex_info rvi, *slot;
4830
4831 rvi.stmt = stmt;
4832 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4833
4834 if (!slot)
4835 return -1;
4836
4837 return slot->index;
4838 }
4839
4840 /* Creates an edge in RDG for each distance vector from DDR. The
4841 order that we keep track of in the RDG is the order in which
4842 statements have to be executed. */
4843
4844 static void
4845 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4846 {
4847 struct graph_edge *e;
4848 int va, vb;
4849 data_reference_p dra = DDR_A (ddr);
4850 data_reference_p drb = DDR_B (ddr);
4851 unsigned level = ddr_dependence_level (ddr);
4852
4853 /* For non scalar dependences, when the dependence is REVERSED,
4854 statement B has to be executed before statement A. */
4855 if (level > 0
4856 && !DDR_REVERSED_P (ddr))
4857 {
4858 data_reference_p tmp = dra;
4859 dra = drb;
4860 drb = tmp;
4861 }
4862
4863 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4864 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4865
4866 if (va < 0 || vb < 0)
4867 return;
4868
4869 e = add_edge (rdg, va, vb);
4870 e->data = XNEW (struct rdg_edge);
4871
4872 RDGE_LEVEL (e) = level;
4873 RDGE_RELATION (e) = ddr;
4874
4875 /* Determines the type of the data dependence. */
4876 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4877 RDGE_TYPE (e) = input_dd;
4878 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4879 RDGE_TYPE (e) = output_dd;
4880 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4881 RDGE_TYPE (e) = flow_dd;
4882 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4883 RDGE_TYPE (e) = anti_dd;
4884 }
4885
4886 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4887 the index of DEF in RDG. */
4888
4889 static void
4890 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4891 {
4892 use_operand_p imm_use_p;
4893 imm_use_iterator iterator;
4894
4895 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4896 {
4897 struct graph_edge *e;
4898 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4899
4900 if (use < 0)
4901 continue;
4902
4903 e = add_edge (rdg, idef, use);
4904 e->data = XNEW (struct rdg_edge);
4905 RDGE_TYPE (e) = flow_dd;
4906 RDGE_RELATION (e) = NULL;
4907 }
4908 }
4909
4910 /* Creates the edges of the reduced dependence graph RDG. */
4911
4912 static void
4913 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4914 {
4915 int i;
4916 struct data_dependence_relation *ddr;
4917 def_operand_p def_p;
4918 ssa_op_iter iter;
4919
4920 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4921 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4922 create_rdg_edge_for_ddr (rdg, ddr);
4923
4924 for (i = 0; i < rdg->n_vertices; i++)
4925 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4926 iter, SSA_OP_DEF)
4927 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4928 }
4929
4930 /* Build the vertices of the reduced dependence graph RDG. */
4931
4932 void
4933 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4934 {
4935 int i, j;
4936 gimple stmt;
4937
4938 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4939 {
4940 VEC (data_ref_loc, heap) *references;
4941 data_ref_loc *ref;
4942 struct vertex *v = &(rdg->vertices[i]);
4943 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4944 struct rdg_vertex_info **slot;
4945
4946 rvi->stmt = stmt;
4947 rvi->index = i;
4948 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4949
4950 if (!*slot)
4951 *slot = rvi;
4952 else
4953 free (rvi);
4954
4955 v->data = XNEW (struct rdg_vertex);
4956 RDG_STMT (rdg, i) = stmt;
4957
4958 RDG_MEM_WRITE_STMT (rdg, i) = false;
4959 RDG_MEM_READS_STMT (rdg, i) = false;
4960 if (gimple_code (stmt) == GIMPLE_PHI)
4961 continue;
4962
4963 get_references_in_stmt (stmt, &references);
4964 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4965 if (!ref->is_read)
4966 RDG_MEM_WRITE_STMT (rdg, i) = true;
4967 else
4968 RDG_MEM_READS_STMT (rdg, i) = true;
4969
4970 VEC_free (data_ref_loc, heap, references);
4971 }
4972 }
4973
4974 /* Initialize STMTS with all the statements of LOOP. When
4975 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4976 which we discover statements is important as
4977 generate_loops_for_partition is using the same traversal for
4978 identifying statements. */
4979
4980 static void
4981 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4982 {
4983 unsigned int i;
4984 basic_block *bbs = get_loop_body_in_dom_order (loop);
4985
4986 for (i = 0; i < loop->num_nodes; i++)
4987 {
4988 basic_block bb = bbs[i];
4989 gimple_stmt_iterator bsi;
4990 gimple stmt;
4991
4992 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4993 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4994
4995 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4996 {
4997 stmt = gsi_stmt (bsi);
4998 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4999 VEC_safe_push (gimple, heap, *stmts, stmt);
5000 }
5001 }
5002
5003 free (bbs);
5004 }
5005
5006 /* Returns true when all the dependences are computable. */
5007
5008 static bool
5009 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5010 {
5011 ddr_p ddr;
5012 unsigned int i;
5013
5014 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5015 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5016 return false;
5017
5018 return true;
5019 }
5020
5021 /* Computes a hash function for element ELT. */
5022
5023 static hashval_t
5024 hash_stmt_vertex_info (const void *elt)
5025 {
5026 const struct rdg_vertex_info *const rvi =
5027 (const struct rdg_vertex_info *) elt;
5028 gimple stmt = rvi->stmt;
5029
5030 return htab_hash_pointer (stmt);
5031 }
5032
5033 /* Compares database elements E1 and E2. */
5034
5035 static int
5036 eq_stmt_vertex_info (const void *e1, const void *e2)
5037 {
5038 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5039 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5040
5041 return elt1->stmt == elt2->stmt;
5042 }
5043
5044 /* Free the element E. */
5045
5046 static void
5047 hash_stmt_vertex_del (void *e)
5048 {
5049 free (e);
5050 }
5051
5052 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5053 statement of the loop nest, and one edge per data dependence or
5054 scalar dependence. */
5055
5056 struct graph *
5057 build_empty_rdg (int n_stmts)
5058 {
5059 int nb_data_refs = 10;
5060 struct graph *rdg = new_graph (n_stmts);
5061
5062 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5063 eq_stmt_vertex_info, hash_stmt_vertex_del);
5064 return rdg;
5065 }
5066
5067 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5068 statement of the loop nest, and one edge per data dependence or
5069 scalar dependence. */
5070
5071 struct graph *
5072 build_rdg (struct loop *loop,
5073 VEC (loop_p, heap) **loop_nest,
5074 VEC (ddr_p, heap) **dependence_relations,
5075 VEC (data_reference_p, heap) **datarefs)
5076 {
5077 struct graph *rdg = NULL;
5078 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5079
5080 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5081 dependence_relations);
5082
5083 if (known_dependences_p (*dependence_relations))
5084 {
5085 stmts_from_loop (loop, &stmts);
5086 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5087 create_rdg_vertices (rdg, stmts);
5088 create_rdg_edges (rdg, *dependence_relations);
5089 }
5090
5091 VEC_free (gimple, heap, stmts);
5092 return rdg;
5093 }
5094
5095 /* Free the reduced dependence graph RDG. */
5096
5097 void
5098 free_rdg (struct graph *rdg)
5099 {
5100 int i;
5101
5102 for (i = 0; i < rdg->n_vertices; i++)
5103 {
5104 struct vertex *v = &(rdg->vertices[i]);
5105 struct graph_edge *e;
5106
5107 for (e = v->succ; e; e = e->succ_next)
5108 free (e->data);
5109
5110 free (v->data);
5111 }
5112
5113 htab_delete (rdg->indices);
5114 free_graph (rdg);
5115 }
5116
5117 /* Initialize STMTS with all the statements of LOOP that contain a
5118 store to memory. */
5119
5120 void
5121 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5122 {
5123 unsigned int i;
5124 basic_block *bbs = get_loop_body_in_dom_order (loop);
5125
5126 for (i = 0; i < loop->num_nodes; i++)
5127 {
5128 basic_block bb = bbs[i];
5129 gimple_stmt_iterator bsi;
5130
5131 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5132 if (gimple_vdef (gsi_stmt (bsi)))
5133 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5134 }
5135
5136 free (bbs);
5137 }
5138
5139 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5140 that contains a data reference on its LHS with a stride of the same
5141 size as its unit type. */
5142
5143 bool
5144 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5145 {
5146 tree op0, op1;
5147 bool res;
5148 struct data_reference *dr;
5149
5150 if (!stmt
5151 || !gimple_vdef (stmt)
5152 || !is_gimple_assign (stmt)
5153 || !gimple_assign_single_p (stmt)
5154 || !(op1 = gimple_assign_rhs1 (stmt))
5155 || !(integer_zerop (op1) || real_zerop (op1)))
5156 return false;
5157
5158 dr = XCNEW (struct data_reference);
5159 op0 = gimple_assign_lhs (stmt);
5160
5161 DR_STMT (dr) = stmt;
5162 DR_REF (dr) = op0;
5163
5164 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5165 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5166
5167 free_data_ref (dr);
5168 return res;
5169 }
5170
5171 /* Initialize STMTS with all the statements of LOOP that contain a
5172 store to memory of the form "A[i] = 0". */
5173
5174 void
5175 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5176 {
5177 unsigned int i;
5178 basic_block bb;
5179 gimple_stmt_iterator si;
5180 gimple stmt;
5181 basic_block *bbs = get_loop_body_in_dom_order (loop);
5182
5183 for (i = 0; i < loop->num_nodes; i++)
5184 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5185 if ((stmt = gsi_stmt (si))
5186 && stmt_with_adjacent_zero_store_dr_p (stmt))
5187 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5188
5189 free (bbs);
5190 }
5191
5192 /* For a data reference REF, return the declaration of its base
5193 address or NULL_TREE if the base is not determined. */
5194
5195 static inline tree
5196 ref_base_address (gimple stmt, data_ref_loc *ref)
5197 {
5198 tree base = NULL_TREE;
5199 tree base_address;
5200 struct data_reference *dr = XCNEW (struct data_reference);
5201
5202 DR_STMT (dr) = stmt;
5203 DR_REF (dr) = *ref->pos;
5204 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5205 base_address = DR_BASE_ADDRESS (dr);
5206
5207 if (!base_address)
5208 goto end;
5209
5210 switch (TREE_CODE (base_address))
5211 {
5212 case ADDR_EXPR:
5213 base = TREE_OPERAND (base_address, 0);
5214 break;
5215
5216 default:
5217 base = base_address;
5218 break;
5219 }
5220
5221 end:
5222 free_data_ref (dr);
5223 return base;
5224 }
5225
5226 /* Determines whether the statement from vertex V of the RDG has a
5227 definition used outside the loop that contains this statement. */
5228
5229 bool
5230 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5231 {
5232 gimple stmt = RDG_STMT (rdg, v);
5233 struct loop *loop = loop_containing_stmt (stmt);
5234 use_operand_p imm_use_p;
5235 imm_use_iterator iterator;
5236 ssa_op_iter it;
5237 def_operand_p def_p;
5238
5239 if (!loop)
5240 return true;
5241
5242 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5243 {
5244 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5245 {
5246 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5247 return true;
5248 }
5249 }
5250
5251 return false;
5252 }
5253
5254 /* Determines whether statements S1 and S2 access to similar memory
5255 locations. Two memory accesses are considered similar when they
5256 have the same base address declaration, i.e. when their
5257 ref_base_address is the same. */
5258
5259 bool
5260 have_similar_memory_accesses (gimple s1, gimple s2)
5261 {
5262 bool res = false;
5263 unsigned i, j;
5264 VEC (data_ref_loc, heap) *refs1, *refs2;
5265 data_ref_loc *ref1, *ref2;
5266
5267 get_references_in_stmt (s1, &refs1);
5268 get_references_in_stmt (s2, &refs2);
5269
5270 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5271 {
5272 tree base1 = ref_base_address (s1, ref1);
5273
5274 if (base1)
5275 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5276 if (base1 == ref_base_address (s2, ref2))
5277 {
5278 res = true;
5279 goto end;
5280 }
5281 }
5282
5283 end:
5284 VEC_free (data_ref_loc, heap, refs1);
5285 VEC_free (data_ref_loc, heap, refs2);
5286 return res;
5287 }
5288
5289 /* Helper function for the hashtab. */
5290
5291 static int
5292 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5293 {
5294 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5295 CONST_CAST_GIMPLE ((const_gimple) s2));
5296 }
5297
5298 /* Helper function for the hashtab. */
5299
5300 static hashval_t
5301 ref_base_address_1 (const void *s)
5302 {
5303 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5304 unsigned i;
5305 VEC (data_ref_loc, heap) *refs;
5306 data_ref_loc *ref;
5307 hashval_t res = 0;
5308
5309 get_references_in_stmt (stmt, &refs);
5310
5311 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5312 if (!ref->is_read)
5313 {
5314 res = htab_hash_pointer (ref_base_address (stmt, ref));
5315 break;
5316 }
5317
5318 VEC_free (data_ref_loc, heap, refs);
5319 return res;
5320 }
5321
5322 /* Try to remove duplicated write data references from STMTS. */
5323
5324 void
5325 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5326 {
5327 unsigned i;
5328 gimple stmt;
5329 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5330 have_similar_memory_accesses_1, NULL);
5331
5332 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5333 {
5334 void **slot;
5335
5336 slot = htab_find_slot (seen, stmt, INSERT);
5337
5338 if (*slot)
5339 VEC_ordered_remove (gimple, *stmts, i);
5340 else
5341 {
5342 *slot = (void *) stmt;
5343 i++;
5344 }
5345 }
5346
5347 htab_delete (seen);
5348 }
5349
5350 /* Returns the index of PARAMETER in the parameters vector of the
5351 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5352
5353 int
5354 access_matrix_get_index_for_parameter (tree parameter,
5355 struct access_matrix *access_matrix)
5356 {
5357 int i;
5358 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5359 tree lambda_parameter;
5360
5361 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5362 if (lambda_parameter == parameter)
5363 return i + AM_NB_INDUCTION_VARS (access_matrix);
5364
5365 return -1;
5366 }