Do not include unnecessary .h files.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87
88 static struct datadep_stats
89 {
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
94
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
98
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
103
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
108
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
114
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
120
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
123 {
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
127 }
128
129 /* Returns true iff A divides B. */
130
131 static inline bool
132 int_divides_p (int a, int b)
133 {
134 return ((b % a) == 0);
135 }
136
137 \f
138
139 /* Dump into FILE all the data references from DATAREFS. */
140
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
143 {
144 unsigned int i;
145 struct data_reference *dr;
146
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
149 }
150
151 /* Dump into STDERR all the data references from DATAREFS. */
152
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
155 {
156 dump_data_references (stderr, datarefs);
157 }
158
159 /* Dump to STDERR all the dependence relations from DDRS. */
160
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
163 {
164 dump_data_dependence_relations (stderr, ddrs);
165 }
166
167 /* Dump into FILE all the dependence relations from DDRS. */
168
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
172 {
173 unsigned int i;
174 struct data_dependence_relation *ddr;
175
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
178 }
179
180 /* Print to STDERR the data_reference DR. */
181
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
184 {
185 dump_data_reference (stderr, dr);
186 }
187
188 /* Dump function for a DATA_REFERENCE structure. */
189
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
193 {
194 unsigned int i;
195
196 fprintf (outf, "#(Data Ref: \n# stmt: ");
197 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
198 fprintf (outf, "# ref: ");
199 print_generic_stmt (outf, DR_REF (dr), 0);
200 fprintf (outf, "# base_object: ");
201 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
202
203 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
204 {
205 fprintf (outf, "# Access function %d: ", i);
206 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
207 }
208 fprintf (outf, "#)\n");
209 }
210
211 /* Dumps the affine function described by FN to the file OUTF. */
212
213 static void
214 dump_affine_function (FILE *outf, affine_fn fn)
215 {
216 unsigned i;
217 tree coef;
218
219 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
220 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
221 {
222 fprintf (outf, " + ");
223 print_generic_expr (outf, coef, TDF_SLIM);
224 fprintf (outf, " * x_%u", i);
225 }
226 }
227
228 /* Dumps the conflict function CF to the file OUTF. */
229
230 static void
231 dump_conflict_function (FILE *outf, conflict_function *cf)
232 {
233 unsigned i;
234
235 if (cf->n == NO_DEPENDENCE)
236 fprintf (outf, "no dependence\n");
237 else if (cf->n == NOT_KNOWN)
238 fprintf (outf, "not known\n");
239 else
240 {
241 for (i = 0; i < cf->n; i++)
242 {
243 fprintf (outf, "[");
244 dump_affine_function (outf, cf->fns[i]);
245 fprintf (outf, "]\n");
246 }
247 }
248 }
249
250 /* Dump function for a SUBSCRIPT structure. */
251
252 void
253 dump_subscript (FILE *outf, struct subscript *subscript)
254 {
255 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
256
257 fprintf (outf, "\n (subscript \n");
258 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
259 dump_conflict_function (outf, cf);
260 if (CF_NONTRIVIAL_P (cf))
261 {
262 tree last_iteration = SUB_LAST_CONFLICT (subscript);
263 fprintf (outf, " last_conflict: ");
264 print_generic_stmt (outf, last_iteration, 0);
265 }
266
267 cf = SUB_CONFLICTS_IN_B (subscript);
268 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
269 dump_conflict_function (outf, cf);
270 if (CF_NONTRIVIAL_P (cf))
271 {
272 tree last_iteration = SUB_LAST_CONFLICT (subscript);
273 fprintf (outf, " last_conflict: ");
274 print_generic_stmt (outf, last_iteration, 0);
275 }
276
277 fprintf (outf, " (Subscript distance: ");
278 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
279 fprintf (outf, " )\n");
280 fprintf (outf, " )\n");
281 }
282
283 /* Print the classic direction vector DIRV to OUTF. */
284
285 void
286 print_direction_vector (FILE *outf,
287 lambda_vector dirv,
288 int length)
289 {
290 int eq;
291
292 for (eq = 0; eq < length; eq++)
293 {
294 enum data_dependence_direction dir = ((enum data_dependence_direction)
295 dirv[eq]);
296
297 switch (dir)
298 {
299 case dir_positive:
300 fprintf (outf, " +");
301 break;
302 case dir_negative:
303 fprintf (outf, " -");
304 break;
305 case dir_equal:
306 fprintf (outf, " =");
307 break;
308 case dir_positive_or_equal:
309 fprintf (outf, " +=");
310 break;
311 case dir_positive_or_negative:
312 fprintf (outf, " +-");
313 break;
314 case dir_negative_or_equal:
315 fprintf (outf, " -=");
316 break;
317 case dir_star:
318 fprintf (outf, " *");
319 break;
320 default:
321 fprintf (outf, "indep");
322 break;
323 }
324 }
325 fprintf (outf, "\n");
326 }
327
328 /* Print a vector of direction vectors. */
329
330 void
331 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
332 int length)
333 {
334 unsigned j;
335 lambda_vector v;
336
337 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
338 print_direction_vector (outf, v, length);
339 }
340
341 /* Print a vector of distance vectors. */
342
343 void
344 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
345 int length)
346 {
347 unsigned j;
348 lambda_vector v;
349
350 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
351 print_lambda_vector (outf, v, length);
352 }
353
354 /* Debug version. */
355
356 DEBUG_FUNCTION void
357 debug_data_dependence_relation (struct data_dependence_relation *ddr)
358 {
359 dump_data_dependence_relation (stderr, ddr);
360 }
361
362 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
363
364 void
365 dump_data_dependence_relation (FILE *outf,
366 struct data_dependence_relation *ddr)
367 {
368 struct data_reference *dra, *drb;
369
370 fprintf (outf, "(Data Dep: \n");
371
372 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
373 {
374 if (ddr)
375 {
376 dra = DDR_A (ddr);
377 drb = DDR_B (ddr);
378 if (dra)
379 dump_data_reference (outf, dra);
380 else
381 fprintf (outf, " (nil)\n");
382 if (drb)
383 dump_data_reference (outf, drb);
384 else
385 fprintf (outf, " (nil)\n");
386 }
387 fprintf (outf, " (don't know)\n)\n");
388 return;
389 }
390
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 dump_data_reference (outf, dra);
394 dump_data_reference (outf, drb);
395
396 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
397 fprintf (outf, " (no dependence)\n");
398
399 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
400 {
401 unsigned int i;
402 struct loop *loopi;
403
404 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
405 {
406 fprintf (outf, " access_fn_A: ");
407 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
408 fprintf (outf, " access_fn_B: ");
409 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
410 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
411 }
412
413 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
414 fprintf (outf, " loop nest: (");
415 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
416 fprintf (outf, "%d ", loopi->num);
417 fprintf (outf, ")\n");
418
419 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
420 {
421 fprintf (outf, " distance_vector: ");
422 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
423 DDR_NB_LOOPS (ddr));
424 }
425
426 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
427 {
428 fprintf (outf, " direction_vector: ");
429 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
430 DDR_NB_LOOPS (ddr));
431 }
432 }
433
434 fprintf (outf, ")\n");
435 }
436
437 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
438
439 void
440 dump_data_dependence_direction (FILE *file,
441 enum data_dependence_direction dir)
442 {
443 switch (dir)
444 {
445 case dir_positive:
446 fprintf (file, "+");
447 break;
448
449 case dir_negative:
450 fprintf (file, "-");
451 break;
452
453 case dir_equal:
454 fprintf (file, "=");
455 break;
456
457 case dir_positive_or_negative:
458 fprintf (file, "+-");
459 break;
460
461 case dir_positive_or_equal:
462 fprintf (file, "+=");
463 break;
464
465 case dir_negative_or_equal:
466 fprintf (file, "-=");
467 break;
468
469 case dir_star:
470 fprintf (file, "*");
471 break;
472
473 default:
474 break;
475 }
476 }
477
478 /* Dumps the distance and direction vectors in FILE. DDRS contains
479 the dependence relations, and VECT_SIZE is the size of the
480 dependence vectors, or in other words the number of loops in the
481 considered nest. */
482
483 void
484 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
485 {
486 unsigned int i, j;
487 struct data_dependence_relation *ddr;
488 lambda_vector v;
489
490 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
491 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
492 {
493 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
494 {
495 fprintf (file, "DISTANCE_V (");
496 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
497 fprintf (file, ")\n");
498 }
499
500 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
501 {
502 fprintf (file, "DIRECTION_V (");
503 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
504 fprintf (file, ")\n");
505 }
506 }
507
508 fprintf (file, "\n\n");
509 }
510
511 /* Dumps the data dependence relations DDRS in FILE. */
512
513 void
514 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
515 {
516 unsigned int i;
517 struct data_dependence_relation *ddr;
518
519 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
520 dump_data_dependence_relation (file, ddr);
521
522 fprintf (file, "\n\n");
523 }
524
525 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
526 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
527 constant of type ssizetype, and returns true. If we cannot do this
528 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
529 is returned. */
530
531 static bool
532 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
533 tree *var, tree *off)
534 {
535 tree var0, var1;
536 tree off0, off1;
537 enum tree_code ocode = code;
538
539 *var = NULL_TREE;
540 *off = NULL_TREE;
541
542 switch (code)
543 {
544 case INTEGER_CST:
545 *var = build_int_cst (type, 0);
546 *off = fold_convert (ssizetype, op0);
547 return true;
548
549 case POINTER_PLUS_EXPR:
550 ocode = PLUS_EXPR;
551 /* FALLTHROUGH */
552 case PLUS_EXPR:
553 case MINUS_EXPR:
554 split_constant_offset (op0, &var0, &off0);
555 split_constant_offset (op1, &var1, &off1);
556 *var = fold_build2 (code, type, var0, var1);
557 *off = size_binop (ocode, off0, off1);
558 return true;
559
560 case MULT_EXPR:
561 if (TREE_CODE (op1) != INTEGER_CST)
562 return false;
563
564 split_constant_offset (op0, &var0, &off0);
565 *var = fold_build2 (MULT_EXPR, type, var0, op1);
566 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
567 return true;
568
569 case ADDR_EXPR:
570 {
571 tree base, poffset;
572 HOST_WIDE_INT pbitsize, pbitpos;
573 enum machine_mode pmode;
574 int punsignedp, pvolatilep;
575
576 op0 = TREE_OPERAND (op0, 0);
577 if (!handled_component_p (op0))
578 return false;
579
580 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
581 &pmode, &punsignedp, &pvolatilep, false);
582
583 if (pbitpos % BITS_PER_UNIT != 0)
584 return false;
585 base = build_fold_addr_expr (base);
586 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
587
588 if (poffset)
589 {
590 split_constant_offset (poffset, &poffset, &off1);
591 off0 = size_binop (PLUS_EXPR, off0, off1);
592 if (POINTER_TYPE_P (TREE_TYPE (base)))
593 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
594 base, fold_convert (sizetype, poffset));
595 else
596 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
597 fold_convert (TREE_TYPE (base), poffset));
598 }
599
600 var0 = fold_convert (type, base);
601
602 /* If variable length types are involved, punt, otherwise casts
603 might be converted into ARRAY_REFs in gimplify_conversion.
604 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
605 possibly no longer appears in current GIMPLE, might resurface.
606 This perhaps could run
607 if (CONVERT_EXPR_P (var0))
608 {
609 gimplify_conversion (&var0);
610 // Attempt to fill in any within var0 found ARRAY_REF's
611 // element size from corresponding op embedded ARRAY_REF,
612 // if unsuccessful, just punt.
613 } */
614 while (POINTER_TYPE_P (type))
615 type = TREE_TYPE (type);
616 if (int_size_in_bytes (type) < 0)
617 return false;
618
619 *var = var0;
620 *off = off0;
621 return true;
622 }
623
624 case SSA_NAME:
625 {
626 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
627 enum tree_code subcode;
628
629 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
630 return false;
631
632 var0 = gimple_assign_rhs1 (def_stmt);
633 subcode = gimple_assign_rhs_code (def_stmt);
634 var1 = gimple_assign_rhs2 (def_stmt);
635
636 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
637 }
638 CASE_CONVERT:
639 {
640 /* We must not introduce undefined overflow, and we must not change the value.
641 Hence we're okay if the inner type doesn't overflow to start with
642 (pointer or signed), the outer type also is an integer or pointer
643 and the outer precision is at least as large as the inner. */
644 tree itype = TREE_TYPE (op0);
645 if ((POINTER_TYPE_P (itype)
646 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
647 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
648 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
649 {
650 split_constant_offset (op0, &var0, off);
651 *var = fold_convert (type, var0);
652 return true;
653 }
654 return false;
655 }
656
657 default:
658 return false;
659 }
660 }
661
662 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
663 will be ssizetype. */
664
665 void
666 split_constant_offset (tree exp, tree *var, tree *off)
667 {
668 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
669 enum tree_code code;
670
671 *var = exp;
672 *off = ssize_int (0);
673 STRIP_NOPS (exp);
674
675 if (automatically_generated_chrec_p (exp))
676 return;
677
678 otype = TREE_TYPE (exp);
679 code = TREE_CODE (exp);
680 extract_ops_from_tree (exp, &code, &op0, &op1);
681 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
682 {
683 *var = fold_convert (type, e);
684 *off = o;
685 }
686 }
687
688 /* Returns the address ADDR of an object in a canonical shape (without nop
689 casts, and with type of pointer to the object). */
690
691 static tree
692 canonicalize_base_object_address (tree addr)
693 {
694 tree orig = addr;
695
696 STRIP_NOPS (addr);
697
698 /* The base address may be obtained by casting from integer, in that case
699 keep the cast. */
700 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
701 return orig;
702
703 if (TREE_CODE (addr) != ADDR_EXPR)
704 return addr;
705
706 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
707 }
708
709 /* Analyzes the behavior of the memory reference DR in the innermost loop or
710 basic block that contains it. Returns true if analysis succeed or false
711 otherwise. */
712
713 bool
714 dr_analyze_innermost (struct data_reference *dr)
715 {
716 gimple stmt = DR_STMT (dr);
717 struct loop *loop = loop_containing_stmt (stmt);
718 tree ref = DR_REF (dr);
719 HOST_WIDE_INT pbitsize, pbitpos;
720 tree base, poffset;
721 enum machine_mode pmode;
722 int punsignedp, pvolatilep;
723 affine_iv base_iv, offset_iv;
724 tree init, dinit, step;
725 bool in_loop = (loop && loop->num);
726
727 if (dump_file && (dump_flags & TDF_DETAILS))
728 fprintf (dump_file, "analyze_innermost: ");
729
730 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
731 &pmode, &punsignedp, &pvolatilep, false);
732 gcc_assert (base != NULL_TREE);
733
734 if (pbitpos % BITS_PER_UNIT != 0)
735 {
736 if (dump_file && (dump_flags & TDF_DETAILS))
737 fprintf (dump_file, "failed: bit offset alignment.\n");
738 return false;
739 }
740
741 if (TREE_CODE (base) == MEM_REF)
742 {
743 if (!integer_zerop (TREE_OPERAND (base, 1)))
744 {
745 if (!poffset)
746 {
747 double_int moff = mem_ref_offset (base);
748 poffset = double_int_to_tree (sizetype, moff);
749 }
750 else
751 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
752 }
753 base = TREE_OPERAND (base, 0);
754 }
755 else
756 base = build_fold_addr_expr (base);
757 if (in_loop)
758 {
759 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
760 false))
761 {
762 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "failed: evolution of base is not affine.\n");
764 return false;
765 }
766 }
767 else
768 {
769 base_iv.base = base;
770 base_iv.step = ssize_int (0);
771 base_iv.no_overflow = true;
772 }
773
774 if (!poffset)
775 {
776 offset_iv.base = ssize_int (0);
777 offset_iv.step = ssize_int (0);
778 }
779 else
780 {
781 if (!in_loop)
782 {
783 offset_iv.base = poffset;
784 offset_iv.step = ssize_int (0);
785 }
786 else if (!simple_iv (loop, loop_containing_stmt (stmt),
787 poffset, &offset_iv, false))
788 {
789 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "failed: evolution of offset is not"
791 " affine.\n");
792 return false;
793 }
794 }
795
796 init = ssize_int (pbitpos / BITS_PER_UNIT);
797 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
798 init = size_binop (PLUS_EXPR, init, dinit);
799 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
800 init = size_binop (PLUS_EXPR, init, dinit);
801
802 step = size_binop (PLUS_EXPR,
803 fold_convert (ssizetype, base_iv.step),
804 fold_convert (ssizetype, offset_iv.step));
805
806 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
807
808 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
809 DR_INIT (dr) = init;
810 DR_STEP (dr) = step;
811
812 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
813
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "success.\n");
816
817 return true;
818 }
819
820 /* Determines the base object and the list of indices of memory reference
821 DR, analyzed in loop nest NEST. */
822
823 static void
824 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
825 {
826 gimple stmt = DR_STMT (dr);
827 struct loop *loop = loop_containing_stmt (stmt);
828 VEC (tree, heap) *access_fns = NULL;
829 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
830 tree base, off, access_fn = NULL_TREE;
831 basic_block before_loop = NULL;
832
833 if (nest)
834 before_loop = block_before_loop (nest);
835
836 while (handled_component_p (aref))
837 {
838 if (TREE_CODE (aref) == ARRAY_REF)
839 {
840 op = TREE_OPERAND (aref, 1);
841 if (nest)
842 {
843 access_fn = analyze_scalar_evolution (loop, op);
844 access_fn = instantiate_scev (before_loop, loop, access_fn);
845 VEC_safe_push (tree, heap, access_fns, access_fn);
846 }
847
848 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
849 }
850
851 aref = TREE_OPERAND (aref, 0);
852 }
853
854 if (nest
855 && (INDIRECT_REF_P (aref)
856 || TREE_CODE (aref) == MEM_REF))
857 {
858 op = TREE_OPERAND (aref, 0);
859 access_fn = analyze_scalar_evolution (loop, op);
860 access_fn = instantiate_scev (before_loop, loop, access_fn);
861 base = initial_condition (access_fn);
862 split_constant_offset (base, &base, &off);
863 if (TREE_CODE (aref) == MEM_REF)
864 off = size_binop (PLUS_EXPR, off,
865 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
866 access_fn = chrec_replace_initial_condition (access_fn,
867 fold_convert (TREE_TYPE (base), off));
868
869 TREE_OPERAND (aref, 0) = base;
870 VEC_safe_push (tree, heap, access_fns, access_fn);
871 }
872
873 if (TREE_CODE (aref) == MEM_REF)
874 TREE_OPERAND (aref, 1)
875 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
876
877 if (TREE_CODE (ref) == MEM_REF
878 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
879 && integer_zerop (TREE_OPERAND (ref, 1)))
880 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
881
882 /* For canonicalization purposes we'd like to strip all outermost
883 zero-offset component-refs.
884 ??? For now simply handle zero-index array-refs. */
885 while (TREE_CODE (ref) == ARRAY_REF
886 && integer_zerop (TREE_OPERAND (ref, 1)))
887 ref = TREE_OPERAND (ref, 0);
888
889 DR_BASE_OBJECT (dr) = ref;
890 DR_ACCESS_FNS (dr) = access_fns;
891 }
892
893 /* Extracts the alias analysis information from the memory reference DR. */
894
895 static void
896 dr_analyze_alias (struct data_reference *dr)
897 {
898 tree ref = DR_REF (dr);
899 tree base = get_base_address (ref), addr;
900
901 if (INDIRECT_REF_P (base)
902 || TREE_CODE (base) == MEM_REF)
903 {
904 addr = TREE_OPERAND (base, 0);
905 if (TREE_CODE (addr) == SSA_NAME)
906 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
907 }
908 }
909
910 /* Returns true if the address of DR is invariant. */
911
912 static bool
913 dr_address_invariant_p (struct data_reference *dr)
914 {
915 unsigned i;
916 tree idx;
917
918 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
919 if (tree_contains_chrecs (idx, NULL))
920 return false;
921
922 return true;
923 }
924
925 /* Frees data reference DR. */
926
927 void
928 free_data_ref (data_reference_p dr)
929 {
930 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
931 free (dr);
932 }
933
934 /* Analyzes memory reference MEMREF accessed in STMT. The reference
935 is read if IS_READ is true, write otherwise. Returns the
936 data_reference description of MEMREF. NEST is the outermost loop of the
937 loop nest in that the reference should be analyzed. */
938
939 struct data_reference *
940 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
941 {
942 struct data_reference *dr;
943
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 {
946 fprintf (dump_file, "Creating dr for ");
947 print_generic_expr (dump_file, memref, TDF_SLIM);
948 fprintf (dump_file, "\n");
949 }
950
951 dr = XCNEW (struct data_reference);
952 DR_STMT (dr) = stmt;
953 DR_REF (dr) = memref;
954 DR_IS_READ (dr) = is_read;
955
956 dr_analyze_innermost (dr);
957 dr_analyze_indices (dr, nest);
958 dr_analyze_alias (dr);
959
960 if (dump_file && (dump_flags & TDF_DETAILS))
961 {
962 fprintf (dump_file, "\tbase_address: ");
963 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
964 fprintf (dump_file, "\n\toffset from base address: ");
965 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\tconstant offset from base address: ");
967 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\tstep: ");
969 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\taligned to: ");
971 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
972 fprintf (dump_file, "\n\tbase_object: ");
973 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
974 fprintf (dump_file, "\n");
975 }
976
977 return dr;
978 }
979
980 /* Returns true if FNA == FNB. */
981
982 static bool
983 affine_function_equal_p (affine_fn fna, affine_fn fnb)
984 {
985 unsigned i, n = VEC_length (tree, fna);
986
987 if (n != VEC_length (tree, fnb))
988 return false;
989
990 for (i = 0; i < n; i++)
991 if (!operand_equal_p (VEC_index (tree, fna, i),
992 VEC_index (tree, fnb, i), 0))
993 return false;
994
995 return true;
996 }
997
998 /* If all the functions in CF are the same, returns one of them,
999 otherwise returns NULL. */
1000
1001 static affine_fn
1002 common_affine_function (conflict_function *cf)
1003 {
1004 unsigned i;
1005 affine_fn comm;
1006
1007 if (!CF_NONTRIVIAL_P (cf))
1008 return NULL;
1009
1010 comm = cf->fns[0];
1011
1012 for (i = 1; i < cf->n; i++)
1013 if (!affine_function_equal_p (comm, cf->fns[i]))
1014 return NULL;
1015
1016 return comm;
1017 }
1018
1019 /* Returns the base of the affine function FN. */
1020
1021 static tree
1022 affine_function_base (affine_fn fn)
1023 {
1024 return VEC_index (tree, fn, 0);
1025 }
1026
1027 /* Returns true if FN is a constant. */
1028
1029 static bool
1030 affine_function_constant_p (affine_fn fn)
1031 {
1032 unsigned i;
1033 tree coef;
1034
1035 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1036 if (!integer_zerop (coef))
1037 return false;
1038
1039 return true;
1040 }
1041
1042 /* Returns true if FN is the zero constant function. */
1043
1044 static bool
1045 affine_function_zero_p (affine_fn fn)
1046 {
1047 return (integer_zerop (affine_function_base (fn))
1048 && affine_function_constant_p (fn));
1049 }
1050
1051 /* Returns a signed integer type with the largest precision from TA
1052 and TB. */
1053
1054 static tree
1055 signed_type_for_types (tree ta, tree tb)
1056 {
1057 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1058 return signed_type_for (ta);
1059 else
1060 return signed_type_for (tb);
1061 }
1062
1063 /* Applies operation OP on affine functions FNA and FNB, and returns the
1064 result. */
1065
1066 static affine_fn
1067 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1068 {
1069 unsigned i, n, m;
1070 affine_fn ret;
1071 tree coef;
1072
1073 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1074 {
1075 n = VEC_length (tree, fna);
1076 m = VEC_length (tree, fnb);
1077 }
1078 else
1079 {
1080 n = VEC_length (tree, fnb);
1081 m = VEC_length (tree, fna);
1082 }
1083
1084 ret = VEC_alloc (tree, heap, m);
1085 for (i = 0; i < n; i++)
1086 {
1087 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1088 TREE_TYPE (VEC_index (tree, fnb, i)));
1089
1090 VEC_quick_push (tree, ret,
1091 fold_build2 (op, type,
1092 VEC_index (tree, fna, i),
1093 VEC_index (tree, fnb, i)));
1094 }
1095
1096 for (; VEC_iterate (tree, fna, i, coef); i++)
1097 VEC_quick_push (tree, ret,
1098 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1099 coef, integer_zero_node));
1100 for (; VEC_iterate (tree, fnb, i, coef); i++)
1101 VEC_quick_push (tree, ret,
1102 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1103 integer_zero_node, coef));
1104
1105 return ret;
1106 }
1107
1108 /* Returns the sum of affine functions FNA and FNB. */
1109
1110 static affine_fn
1111 affine_fn_plus (affine_fn fna, affine_fn fnb)
1112 {
1113 return affine_fn_op (PLUS_EXPR, fna, fnb);
1114 }
1115
1116 /* Returns the difference of affine functions FNA and FNB. */
1117
1118 static affine_fn
1119 affine_fn_minus (affine_fn fna, affine_fn fnb)
1120 {
1121 return affine_fn_op (MINUS_EXPR, fna, fnb);
1122 }
1123
1124 /* Frees affine function FN. */
1125
1126 static void
1127 affine_fn_free (affine_fn fn)
1128 {
1129 VEC_free (tree, heap, fn);
1130 }
1131
1132 /* Determine for each subscript in the data dependence relation DDR
1133 the distance. */
1134
1135 static void
1136 compute_subscript_distance (struct data_dependence_relation *ddr)
1137 {
1138 conflict_function *cf_a, *cf_b;
1139 affine_fn fn_a, fn_b, diff;
1140
1141 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1142 {
1143 unsigned int i;
1144
1145 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1146 {
1147 struct subscript *subscript;
1148
1149 subscript = DDR_SUBSCRIPT (ddr, i);
1150 cf_a = SUB_CONFLICTS_IN_A (subscript);
1151 cf_b = SUB_CONFLICTS_IN_B (subscript);
1152
1153 fn_a = common_affine_function (cf_a);
1154 fn_b = common_affine_function (cf_b);
1155 if (!fn_a || !fn_b)
1156 {
1157 SUB_DISTANCE (subscript) = chrec_dont_know;
1158 return;
1159 }
1160 diff = affine_fn_minus (fn_a, fn_b);
1161
1162 if (affine_function_constant_p (diff))
1163 SUB_DISTANCE (subscript) = affine_function_base (diff);
1164 else
1165 SUB_DISTANCE (subscript) = chrec_dont_know;
1166
1167 affine_fn_free (diff);
1168 }
1169 }
1170 }
1171
1172 /* Returns the conflict function for "unknown". */
1173
1174 static conflict_function *
1175 conflict_fn_not_known (void)
1176 {
1177 conflict_function *fn = XCNEW (conflict_function);
1178 fn->n = NOT_KNOWN;
1179
1180 return fn;
1181 }
1182
1183 /* Returns the conflict function for "independent". */
1184
1185 static conflict_function *
1186 conflict_fn_no_dependence (void)
1187 {
1188 conflict_function *fn = XCNEW (conflict_function);
1189 fn->n = NO_DEPENDENCE;
1190
1191 return fn;
1192 }
1193
1194 /* Returns true if the address of OBJ is invariant in LOOP. */
1195
1196 static bool
1197 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1198 {
1199 while (handled_component_p (obj))
1200 {
1201 if (TREE_CODE (obj) == ARRAY_REF)
1202 {
1203 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1204 need to check the stride and the lower bound of the reference. */
1205 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1206 loop->num)
1207 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1208 loop->num))
1209 return false;
1210 }
1211 else if (TREE_CODE (obj) == COMPONENT_REF)
1212 {
1213 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1214 loop->num))
1215 return false;
1216 }
1217 obj = TREE_OPERAND (obj, 0);
1218 }
1219
1220 if (!INDIRECT_REF_P (obj)
1221 && TREE_CODE (obj) != MEM_REF)
1222 return true;
1223
1224 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1225 loop->num);
1226 }
1227
1228 /* Returns false if we can prove that data references A and B do not alias,
1229 true otherwise. */
1230
1231 bool
1232 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1233 {
1234 tree addr_a = DR_BASE_OBJECT (a);
1235 tree addr_b = DR_BASE_OBJECT (b);
1236
1237 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1238 return refs_output_dependent_p (addr_a, addr_b);
1239 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1240 return refs_anti_dependent_p (addr_a, addr_b);
1241 return refs_may_alias_p (addr_a, addr_b);
1242 }
1243
1244 static void compute_self_dependence (struct data_dependence_relation *);
1245
1246 /* Initialize a data dependence relation between data accesses A and
1247 B. NB_LOOPS is the number of loops surrounding the references: the
1248 size of the classic distance/direction vectors. */
1249
1250 static struct data_dependence_relation *
1251 initialize_data_dependence_relation (struct data_reference *a,
1252 struct data_reference *b,
1253 VEC (loop_p, heap) *loop_nest)
1254 {
1255 struct data_dependence_relation *res;
1256 unsigned int i;
1257
1258 res = XNEW (struct data_dependence_relation);
1259 DDR_A (res) = a;
1260 DDR_B (res) = b;
1261 DDR_LOOP_NEST (res) = NULL;
1262 DDR_REVERSED_P (res) = false;
1263 DDR_SUBSCRIPTS (res) = NULL;
1264 DDR_DIR_VECTS (res) = NULL;
1265 DDR_DIST_VECTS (res) = NULL;
1266
1267 if (a == NULL || b == NULL)
1268 {
1269 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1270 return res;
1271 }
1272
1273 /* If the data references do not alias, then they are independent. */
1274 if (!dr_may_alias_p (a, b))
1275 {
1276 DDR_ARE_DEPENDENT (res) = chrec_known;
1277 return res;
1278 }
1279
1280 /* When the references are exactly the same, don't spend time doing
1281 the data dependence tests, just initialize the ddr and return. */
1282 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1283 {
1284 DDR_AFFINE_P (res) = true;
1285 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1286 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1287 DDR_LOOP_NEST (res) = loop_nest;
1288 DDR_INNER_LOOP (res) = 0;
1289 DDR_SELF_REFERENCE (res) = true;
1290 compute_self_dependence (res);
1291 return res;
1292 }
1293
1294 /* If the references do not access the same object, we do not know
1295 whether they alias or not. */
1296 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1297 {
1298 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1299 return res;
1300 }
1301
1302 /* If the base of the object is not invariant in the loop nest, we cannot
1303 analyze it. TODO -- in fact, it would suffice to record that there may
1304 be arbitrary dependences in the loops where the base object varies. */
1305 if (loop_nest
1306 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1307 DR_BASE_OBJECT (a)))
1308 {
1309 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1310 return res;
1311 }
1312
1313 /* If the number of dimensions of the access to not agree we can have
1314 a pointer access to a component of the array element type and an
1315 array access while the base-objects are still the same. Punt. */
1316 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1317 {
1318 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1319 return res;
1320 }
1321
1322 DDR_AFFINE_P (res) = true;
1323 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1324 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1325 DDR_LOOP_NEST (res) = loop_nest;
1326 DDR_INNER_LOOP (res) = 0;
1327 DDR_SELF_REFERENCE (res) = false;
1328
1329 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1330 {
1331 struct subscript *subscript;
1332
1333 subscript = XNEW (struct subscript);
1334 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1335 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1336 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1337 SUB_DISTANCE (subscript) = chrec_dont_know;
1338 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1339 }
1340
1341 return res;
1342 }
1343
1344 /* Frees memory used by the conflict function F. */
1345
1346 static void
1347 free_conflict_function (conflict_function *f)
1348 {
1349 unsigned i;
1350
1351 if (CF_NONTRIVIAL_P (f))
1352 {
1353 for (i = 0; i < f->n; i++)
1354 affine_fn_free (f->fns[i]);
1355 }
1356 free (f);
1357 }
1358
1359 /* Frees memory used by SUBSCRIPTS. */
1360
1361 static void
1362 free_subscripts (VEC (subscript_p, heap) *subscripts)
1363 {
1364 unsigned i;
1365 subscript_p s;
1366
1367 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1368 {
1369 free_conflict_function (s->conflicting_iterations_in_a);
1370 free_conflict_function (s->conflicting_iterations_in_b);
1371 free (s);
1372 }
1373 VEC_free (subscript_p, heap, subscripts);
1374 }
1375
1376 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1377 description. */
1378
1379 static inline void
1380 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1381 tree chrec)
1382 {
1383 if (dump_file && (dump_flags & TDF_DETAILS))
1384 {
1385 fprintf (dump_file, "(dependence classified: ");
1386 print_generic_expr (dump_file, chrec, 0);
1387 fprintf (dump_file, ")\n");
1388 }
1389
1390 DDR_ARE_DEPENDENT (ddr) = chrec;
1391 free_subscripts (DDR_SUBSCRIPTS (ddr));
1392 DDR_SUBSCRIPTS (ddr) = NULL;
1393 }
1394
1395 /* The dependence relation DDR cannot be represented by a distance
1396 vector. */
1397
1398 static inline void
1399 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1400 {
1401 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1403
1404 DDR_AFFINE_P (ddr) = false;
1405 }
1406
1407 \f
1408
1409 /* This section contains the classic Banerjee tests. */
1410
1411 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1412 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1413
1414 static inline bool
1415 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1416 {
1417 return (evolution_function_is_constant_p (chrec_a)
1418 && evolution_function_is_constant_p (chrec_b));
1419 }
1420
1421 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1422 variable, i.e., if the SIV (Single Index Variable) test is true. */
1423
1424 static bool
1425 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1426 {
1427 if ((evolution_function_is_constant_p (chrec_a)
1428 && evolution_function_is_univariate_p (chrec_b))
1429 || (evolution_function_is_constant_p (chrec_b)
1430 && evolution_function_is_univariate_p (chrec_a)))
1431 return true;
1432
1433 if (evolution_function_is_univariate_p (chrec_a)
1434 && evolution_function_is_univariate_p (chrec_b))
1435 {
1436 switch (TREE_CODE (chrec_a))
1437 {
1438 case POLYNOMIAL_CHREC:
1439 switch (TREE_CODE (chrec_b))
1440 {
1441 case POLYNOMIAL_CHREC:
1442 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1443 return false;
1444
1445 default:
1446 return true;
1447 }
1448
1449 default:
1450 return true;
1451 }
1452 }
1453
1454 return false;
1455 }
1456
1457 /* Creates a conflict function with N dimensions. The affine functions
1458 in each dimension follow. */
1459
1460 static conflict_function *
1461 conflict_fn (unsigned n, ...)
1462 {
1463 unsigned i;
1464 conflict_function *ret = XCNEW (conflict_function);
1465 va_list ap;
1466
1467 gcc_assert (0 < n && n <= MAX_DIM);
1468 va_start(ap, n);
1469
1470 ret->n = n;
1471 for (i = 0; i < n; i++)
1472 ret->fns[i] = va_arg (ap, affine_fn);
1473 va_end(ap);
1474
1475 return ret;
1476 }
1477
1478 /* Returns constant affine function with value CST. */
1479
1480 static affine_fn
1481 affine_fn_cst (tree cst)
1482 {
1483 affine_fn fn = VEC_alloc (tree, heap, 1);
1484 VEC_quick_push (tree, fn, cst);
1485 return fn;
1486 }
1487
1488 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1489
1490 static affine_fn
1491 affine_fn_univar (tree cst, unsigned dim, tree coef)
1492 {
1493 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1494 unsigned i;
1495
1496 gcc_assert (dim > 0);
1497 VEC_quick_push (tree, fn, cst);
1498 for (i = 1; i < dim; i++)
1499 VEC_quick_push (tree, fn, integer_zero_node);
1500 VEC_quick_push (tree, fn, coef);
1501 return fn;
1502 }
1503
1504 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1505 *OVERLAPS_B are initialized to the functions that describe the
1506 relation between the elements accessed twice by CHREC_A and
1507 CHREC_B. For k >= 0, the following property is verified:
1508
1509 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1510
1511 static void
1512 analyze_ziv_subscript (tree chrec_a,
1513 tree chrec_b,
1514 conflict_function **overlaps_a,
1515 conflict_function **overlaps_b,
1516 tree *last_conflicts)
1517 {
1518 tree type, difference;
1519 dependence_stats.num_ziv++;
1520
1521 if (dump_file && (dump_flags & TDF_DETAILS))
1522 fprintf (dump_file, "(analyze_ziv_subscript \n");
1523
1524 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1525 chrec_a = chrec_convert (type, chrec_a, NULL);
1526 chrec_b = chrec_convert (type, chrec_b, NULL);
1527 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1528
1529 switch (TREE_CODE (difference))
1530 {
1531 case INTEGER_CST:
1532 if (integer_zerop (difference))
1533 {
1534 /* The difference is equal to zero: the accessed index
1535 overlaps for each iteration in the loop. */
1536 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1537 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1538 *last_conflicts = chrec_dont_know;
1539 dependence_stats.num_ziv_dependent++;
1540 }
1541 else
1542 {
1543 /* The accesses do not overlap. */
1544 *overlaps_a = conflict_fn_no_dependence ();
1545 *overlaps_b = conflict_fn_no_dependence ();
1546 *last_conflicts = integer_zero_node;
1547 dependence_stats.num_ziv_independent++;
1548 }
1549 break;
1550
1551 default:
1552 /* We're not sure whether the indexes overlap. For the moment,
1553 conservatively answer "don't know". */
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1555 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1556
1557 *overlaps_a = conflict_fn_not_known ();
1558 *overlaps_b = conflict_fn_not_known ();
1559 *last_conflicts = chrec_dont_know;
1560 dependence_stats.num_ziv_unimplemented++;
1561 break;
1562 }
1563
1564 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, ")\n");
1566 }
1567
1568 /* Sets NIT to the estimated number of executions of the statements in
1569 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1570 large as the number of iterations. If we have no reliable estimate,
1571 the function returns false, otherwise returns true. */
1572
1573 bool
1574 estimated_loop_iterations (struct loop *loop, bool conservative,
1575 double_int *nit)
1576 {
1577 estimate_numbers_of_iterations_loop (loop, true);
1578 if (conservative)
1579 {
1580 if (!loop->any_upper_bound)
1581 return false;
1582
1583 *nit = loop->nb_iterations_upper_bound;
1584 }
1585 else
1586 {
1587 if (!loop->any_estimate)
1588 return false;
1589
1590 *nit = loop->nb_iterations_estimate;
1591 }
1592
1593 return true;
1594 }
1595
1596 /* Similar to estimated_loop_iterations, but returns the estimate only
1597 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1598 on the number of iterations of LOOP could not be derived, returns -1. */
1599
1600 HOST_WIDE_INT
1601 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1602 {
1603 double_int nit;
1604 HOST_WIDE_INT hwi_nit;
1605
1606 if (!estimated_loop_iterations (loop, conservative, &nit))
1607 return -1;
1608
1609 if (!double_int_fits_in_shwi_p (nit))
1610 return -1;
1611 hwi_nit = double_int_to_shwi (nit);
1612
1613 return hwi_nit < 0 ? -1 : hwi_nit;
1614 }
1615
1616 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1617 and only if it fits to the int type. If this is not the case, or the
1618 estimate on the number of iterations of LOOP could not be derived, returns
1619 chrec_dont_know. */
1620
1621 static tree
1622 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1623 {
1624 double_int nit;
1625 tree type;
1626
1627 if (!estimated_loop_iterations (loop, conservative, &nit))
1628 return chrec_dont_know;
1629
1630 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1631 if (!double_int_fits_to_tree_p (type, nit))
1632 return chrec_dont_know;
1633
1634 return double_int_to_tree (type, nit);
1635 }
1636
1637 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1638 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1639 *OVERLAPS_B are initialized to the functions that describe the
1640 relation between the elements accessed twice by CHREC_A and
1641 CHREC_B. For k >= 0, the following property is verified:
1642
1643 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1644
1645 static void
1646 analyze_siv_subscript_cst_affine (tree chrec_a,
1647 tree chrec_b,
1648 conflict_function **overlaps_a,
1649 conflict_function **overlaps_b,
1650 tree *last_conflicts)
1651 {
1652 bool value0, value1, value2;
1653 tree type, difference, tmp;
1654
1655 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1656 chrec_a = chrec_convert (type, chrec_a, NULL);
1657 chrec_b = chrec_convert (type, chrec_b, NULL);
1658 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1659
1660 if (!chrec_is_positive (initial_condition (difference), &value0))
1661 {
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1664
1665 dependence_stats.num_siv_unimplemented++;
1666 *overlaps_a = conflict_fn_not_known ();
1667 *overlaps_b = conflict_fn_not_known ();
1668 *last_conflicts = chrec_dont_know;
1669 return;
1670 }
1671 else
1672 {
1673 if (value0 == false)
1674 {
1675 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1676 {
1677 if (dump_file && (dump_flags & TDF_DETAILS))
1678 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1679
1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
1682 *last_conflicts = chrec_dont_know;
1683 dependence_stats.num_siv_unimplemented++;
1684 return;
1685 }
1686 else
1687 {
1688 if (value1 == true)
1689 {
1690 /* Example:
1691 chrec_a = 12
1692 chrec_b = {10, +, 1}
1693 */
1694
1695 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1696 {
1697 HOST_WIDE_INT numiter;
1698 struct loop *loop = get_chrec_loop (chrec_b);
1699
1700 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1701 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1702 fold_build1 (ABS_EXPR, type, difference),
1703 CHREC_RIGHT (chrec_b));
1704 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1705 *last_conflicts = integer_one_node;
1706
1707
1708 /* Perform weak-zero siv test to see if overlap is
1709 outside the loop bounds. */
1710 numiter = estimated_loop_iterations_int (loop, false);
1711
1712 if (numiter >= 0
1713 && compare_tree_int (tmp, numiter) > 0)
1714 {
1715 free_conflict_function (*overlaps_a);
1716 free_conflict_function (*overlaps_b);
1717 *overlaps_a = conflict_fn_no_dependence ();
1718 *overlaps_b = conflict_fn_no_dependence ();
1719 *last_conflicts = integer_zero_node;
1720 dependence_stats.num_siv_independent++;
1721 return;
1722 }
1723 dependence_stats.num_siv_dependent++;
1724 return;
1725 }
1726
1727 /* When the step does not divide the difference, there are
1728 no overlaps. */
1729 else
1730 {
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1735 return;
1736 }
1737 }
1738
1739 else
1740 {
1741 /* Example:
1742 chrec_a = 12
1743 chrec_b = {10, +, -1}
1744
1745 In this case, chrec_a will not overlap with chrec_b. */
1746 *overlaps_a = conflict_fn_no_dependence ();
1747 *overlaps_b = conflict_fn_no_dependence ();
1748 *last_conflicts = integer_zero_node;
1749 dependence_stats.num_siv_independent++;
1750 return;
1751 }
1752 }
1753 }
1754 else
1755 {
1756 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1757 {
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1759 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1760
1761 *overlaps_a = conflict_fn_not_known ();
1762 *overlaps_b = conflict_fn_not_known ();
1763 *last_conflicts = chrec_dont_know;
1764 dependence_stats.num_siv_unimplemented++;
1765 return;
1766 }
1767 else
1768 {
1769 if (value2 == false)
1770 {
1771 /* Example:
1772 chrec_a = 3
1773 chrec_b = {10, +, -1}
1774 */
1775 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1776 {
1777 HOST_WIDE_INT numiter;
1778 struct loop *loop = get_chrec_loop (chrec_b);
1779
1780 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1781 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1782 CHREC_RIGHT (chrec_b));
1783 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1784 *last_conflicts = integer_one_node;
1785
1786 /* Perform weak-zero siv test to see if overlap is
1787 outside the loop bounds. */
1788 numiter = estimated_loop_iterations_int (loop, false);
1789
1790 if (numiter >= 0
1791 && compare_tree_int (tmp, numiter) > 0)
1792 {
1793 free_conflict_function (*overlaps_a);
1794 free_conflict_function (*overlaps_b);
1795 *overlaps_a = conflict_fn_no_dependence ();
1796 *overlaps_b = conflict_fn_no_dependence ();
1797 *last_conflicts = integer_zero_node;
1798 dependence_stats.num_siv_independent++;
1799 return;
1800 }
1801 dependence_stats.num_siv_dependent++;
1802 return;
1803 }
1804
1805 /* When the step does not divide the difference, there
1806 are no overlaps. */
1807 else
1808 {
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1813 return;
1814 }
1815 }
1816 else
1817 {
1818 /* Example:
1819 chrec_a = 3
1820 chrec_b = {4, +, 1}
1821
1822 In this case, chrec_a will not overlap with chrec_b. */
1823 *overlaps_a = conflict_fn_no_dependence ();
1824 *overlaps_b = conflict_fn_no_dependence ();
1825 *last_conflicts = integer_zero_node;
1826 dependence_stats.num_siv_independent++;
1827 return;
1828 }
1829 }
1830 }
1831 }
1832 }
1833
1834 /* Helper recursive function for initializing the matrix A. Returns
1835 the initial value of CHREC. */
1836
1837 static tree
1838 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1839 {
1840 gcc_assert (chrec);
1841
1842 switch (TREE_CODE (chrec))
1843 {
1844 case POLYNOMIAL_CHREC:
1845 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1846
1847 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1848 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1849
1850 case PLUS_EXPR:
1851 case MULT_EXPR:
1852 case MINUS_EXPR:
1853 {
1854 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1855 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1856
1857 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1858 }
1859
1860 case NOP_EXPR:
1861 {
1862 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1863 return chrec_convert (chrec_type (chrec), op, NULL);
1864 }
1865
1866 case BIT_NOT_EXPR:
1867 {
1868 /* Handle ~X as -1 - X. */
1869 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1870 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1871 build_int_cst (TREE_TYPE (chrec), -1), op);
1872 }
1873
1874 case INTEGER_CST:
1875 return chrec;
1876
1877 default:
1878 gcc_unreachable ();
1879 return NULL_TREE;
1880 }
1881 }
1882
1883 #define FLOOR_DIV(x,y) ((x) / (y))
1884
1885 /* Solves the special case of the Diophantine equation:
1886 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1887
1888 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1889 number of iterations that loops X and Y run. The overlaps will be
1890 constructed as evolutions in dimension DIM. */
1891
1892 static void
1893 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1894 affine_fn *overlaps_a,
1895 affine_fn *overlaps_b,
1896 tree *last_conflicts, int dim)
1897 {
1898 if (((step_a > 0 && step_b > 0)
1899 || (step_a < 0 && step_b < 0)))
1900 {
1901 int step_overlaps_a, step_overlaps_b;
1902 int gcd_steps_a_b, last_conflict, tau2;
1903
1904 gcd_steps_a_b = gcd (step_a, step_b);
1905 step_overlaps_a = step_b / gcd_steps_a_b;
1906 step_overlaps_b = step_a / gcd_steps_a_b;
1907
1908 if (niter > 0)
1909 {
1910 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1911 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1912 last_conflict = tau2;
1913 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1914 }
1915 else
1916 *last_conflicts = chrec_dont_know;
1917
1918 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1919 build_int_cst (NULL_TREE,
1920 step_overlaps_a));
1921 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1922 build_int_cst (NULL_TREE,
1923 step_overlaps_b));
1924 }
1925
1926 else
1927 {
1928 *overlaps_a = affine_fn_cst (integer_zero_node);
1929 *overlaps_b = affine_fn_cst (integer_zero_node);
1930 *last_conflicts = integer_zero_node;
1931 }
1932 }
1933
1934 /* Solves the special case of a Diophantine equation where CHREC_A is
1935 an affine bivariate function, and CHREC_B is an affine univariate
1936 function. For example,
1937
1938 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1939
1940 has the following overlapping functions:
1941
1942 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1943 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1944 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1945
1946 FORNOW: This is a specialized implementation for a case occurring in
1947 a common benchmark. Implement the general algorithm. */
1948
1949 static void
1950 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1951 conflict_function **overlaps_a,
1952 conflict_function **overlaps_b,
1953 tree *last_conflicts)
1954 {
1955 bool xz_p, yz_p, xyz_p;
1956 int step_x, step_y, step_z;
1957 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1958 affine_fn overlaps_a_xz, overlaps_b_xz;
1959 affine_fn overlaps_a_yz, overlaps_b_yz;
1960 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1961 affine_fn ova1, ova2, ovb;
1962 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1963
1964 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1965 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1966 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1967
1968 niter_x =
1969 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1970 false);
1971 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1972 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1973
1974 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1975 {
1976 if (dump_file && (dump_flags & TDF_DETAILS))
1977 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1978
1979 *overlaps_a = conflict_fn_not_known ();
1980 *overlaps_b = conflict_fn_not_known ();
1981 *last_conflicts = chrec_dont_know;
1982 return;
1983 }
1984
1985 niter = MIN (niter_x, niter_z);
1986 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1987 &overlaps_a_xz,
1988 &overlaps_b_xz,
1989 &last_conflicts_xz, 1);
1990 niter = MIN (niter_y, niter_z);
1991 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1992 &overlaps_a_yz,
1993 &overlaps_b_yz,
1994 &last_conflicts_yz, 2);
1995 niter = MIN (niter_x, niter_z);
1996 niter = MIN (niter_y, niter);
1997 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1998 &overlaps_a_xyz,
1999 &overlaps_b_xyz,
2000 &last_conflicts_xyz, 3);
2001
2002 xz_p = !integer_zerop (last_conflicts_xz);
2003 yz_p = !integer_zerop (last_conflicts_yz);
2004 xyz_p = !integer_zerop (last_conflicts_xyz);
2005
2006 if (xz_p || yz_p || xyz_p)
2007 {
2008 ova1 = affine_fn_cst (integer_zero_node);
2009 ova2 = affine_fn_cst (integer_zero_node);
2010 ovb = affine_fn_cst (integer_zero_node);
2011 if (xz_p)
2012 {
2013 affine_fn t0 = ova1;
2014 affine_fn t2 = ovb;
2015
2016 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2017 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2018 affine_fn_free (t0);
2019 affine_fn_free (t2);
2020 *last_conflicts = last_conflicts_xz;
2021 }
2022 if (yz_p)
2023 {
2024 affine_fn t0 = ova2;
2025 affine_fn t2 = ovb;
2026
2027 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2028 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2029 affine_fn_free (t0);
2030 affine_fn_free (t2);
2031 *last_conflicts = last_conflicts_yz;
2032 }
2033 if (xyz_p)
2034 {
2035 affine_fn t0 = ova1;
2036 affine_fn t2 = ova2;
2037 affine_fn t4 = ovb;
2038
2039 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2040 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2041 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2042 affine_fn_free (t0);
2043 affine_fn_free (t2);
2044 affine_fn_free (t4);
2045 *last_conflicts = last_conflicts_xyz;
2046 }
2047 *overlaps_a = conflict_fn (2, ova1, ova2);
2048 *overlaps_b = conflict_fn (1, ovb);
2049 }
2050 else
2051 {
2052 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2053 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2054 *last_conflicts = integer_zero_node;
2055 }
2056
2057 affine_fn_free (overlaps_a_xz);
2058 affine_fn_free (overlaps_b_xz);
2059 affine_fn_free (overlaps_a_yz);
2060 affine_fn_free (overlaps_b_yz);
2061 affine_fn_free (overlaps_a_xyz);
2062 affine_fn_free (overlaps_b_xyz);
2063 }
2064
2065 /* Determines the overlapping elements due to accesses CHREC_A and
2066 CHREC_B, that are affine functions. This function cannot handle
2067 symbolic evolution functions, ie. when initial conditions are
2068 parameters, because it uses lambda matrices of integers. */
2069
2070 static void
2071 analyze_subscript_affine_affine (tree chrec_a,
2072 tree chrec_b,
2073 conflict_function **overlaps_a,
2074 conflict_function **overlaps_b,
2075 tree *last_conflicts)
2076 {
2077 unsigned nb_vars_a, nb_vars_b, dim;
2078 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2079 lambda_matrix A, U, S;
2080 struct obstack scratch_obstack;
2081
2082 if (eq_evolutions_p (chrec_a, chrec_b))
2083 {
2084 /* The accessed index overlaps for each iteration in the
2085 loop. */
2086 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2087 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2088 *last_conflicts = chrec_dont_know;
2089 return;
2090 }
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2092 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2093
2094 /* For determining the initial intersection, we have to solve a
2095 Diophantine equation. This is the most time consuming part.
2096
2097 For answering to the question: "Is there a dependence?" we have
2098 to prove that there exists a solution to the Diophantine
2099 equation, and that the solution is in the iteration domain,
2100 i.e. the solution is positive or zero, and that the solution
2101 happens before the upper bound loop.nb_iterations. Otherwise
2102 there is no dependence. This function outputs a description of
2103 the iterations that hold the intersections. */
2104
2105 nb_vars_a = nb_vars_in_chrec (chrec_a);
2106 nb_vars_b = nb_vars_in_chrec (chrec_b);
2107
2108 gcc_obstack_init (&scratch_obstack);
2109
2110 dim = nb_vars_a + nb_vars_b;
2111 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2112 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2113 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2114
2115 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2116 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2117 gamma = init_b - init_a;
2118
2119 /* Don't do all the hard work of solving the Diophantine equation
2120 when we already know the solution: for example,
2121 | {3, +, 1}_1
2122 | {3, +, 4}_2
2123 | gamma = 3 - 3 = 0.
2124 Then the first overlap occurs during the first iterations:
2125 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2126 */
2127 if (gamma == 0)
2128 {
2129 if (nb_vars_a == 1 && nb_vars_b == 1)
2130 {
2131 HOST_WIDE_INT step_a, step_b;
2132 HOST_WIDE_INT niter, niter_a, niter_b;
2133 affine_fn ova, ovb;
2134
2135 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2136 false);
2137 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2138 false);
2139 niter = MIN (niter_a, niter_b);
2140 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2141 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2142
2143 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2144 &ova, &ovb,
2145 last_conflicts, 1);
2146 *overlaps_a = conflict_fn (1, ova);
2147 *overlaps_b = conflict_fn (1, ovb);
2148 }
2149
2150 else if (nb_vars_a == 2 && nb_vars_b == 1)
2151 compute_overlap_steps_for_affine_1_2
2152 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2153
2154 else if (nb_vars_a == 1 && nb_vars_b == 2)
2155 compute_overlap_steps_for_affine_1_2
2156 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2157
2158 else
2159 {
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2162 *overlaps_a = conflict_fn_not_known ();
2163 *overlaps_b = conflict_fn_not_known ();
2164 *last_conflicts = chrec_dont_know;
2165 }
2166 goto end_analyze_subs_aa;
2167 }
2168
2169 /* U.A = S */
2170 lambda_matrix_right_hermite (A, dim, 1, S, U);
2171
2172 if (S[0][0] < 0)
2173 {
2174 S[0][0] *= -1;
2175 lambda_matrix_row_negate (U, dim, 0);
2176 }
2177 gcd_alpha_beta = S[0][0];
2178
2179 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2180 but that is a quite strange case. Instead of ICEing, answer
2181 don't know. */
2182 if (gcd_alpha_beta == 0)
2183 {
2184 *overlaps_a = conflict_fn_not_known ();
2185 *overlaps_b = conflict_fn_not_known ();
2186 *last_conflicts = chrec_dont_know;
2187 goto end_analyze_subs_aa;
2188 }
2189
2190 /* The classic "gcd-test". */
2191 if (!int_divides_p (gcd_alpha_beta, gamma))
2192 {
2193 /* The "gcd-test" has determined that there is no integer
2194 solution, i.e. there is no dependence. */
2195 *overlaps_a = conflict_fn_no_dependence ();
2196 *overlaps_b = conflict_fn_no_dependence ();
2197 *last_conflicts = integer_zero_node;
2198 }
2199
2200 /* Both access functions are univariate. This includes SIV and MIV cases. */
2201 else if (nb_vars_a == 1 && nb_vars_b == 1)
2202 {
2203 /* Both functions should have the same evolution sign. */
2204 if (((A[0][0] > 0 && -A[1][0] > 0)
2205 || (A[0][0] < 0 && -A[1][0] < 0)))
2206 {
2207 /* The solutions are given by:
2208 |
2209 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2210 | [u21 u22] [y0]
2211
2212 For a given integer t. Using the following variables,
2213
2214 | i0 = u11 * gamma / gcd_alpha_beta
2215 | j0 = u12 * gamma / gcd_alpha_beta
2216 | i1 = u21
2217 | j1 = u22
2218
2219 the solutions are:
2220
2221 | x0 = i0 + i1 * t,
2222 | y0 = j0 + j1 * t. */
2223 HOST_WIDE_INT i0, j0, i1, j1;
2224
2225 i0 = U[0][0] * gamma / gcd_alpha_beta;
2226 j0 = U[0][1] * gamma / gcd_alpha_beta;
2227 i1 = U[1][0];
2228 j1 = U[1][1];
2229
2230 if ((i1 == 0 && i0 < 0)
2231 || (j1 == 0 && j0 < 0))
2232 {
2233 /* There is no solution.
2234 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2235 falls in here, but for the moment we don't look at the
2236 upper bound of the iteration domain. */
2237 *overlaps_a = conflict_fn_no_dependence ();
2238 *overlaps_b = conflict_fn_no_dependence ();
2239 *last_conflicts = integer_zero_node;
2240 goto end_analyze_subs_aa;
2241 }
2242
2243 if (i1 > 0 && j1 > 0)
2244 {
2245 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2246 (get_chrec_loop (chrec_a), false);
2247 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2248 (get_chrec_loop (chrec_b), false);
2249 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2250
2251 /* (X0, Y0) is a solution of the Diophantine equation:
2252 "chrec_a (X0) = chrec_b (Y0)". */
2253 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2254 CEIL (-j0, j1));
2255 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2256 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2257
2258 /* (X1, Y1) is the smallest positive solution of the eq
2259 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2260 first conflict occurs. */
2261 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2262 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2263 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2264
2265 if (niter > 0)
2266 {
2267 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2268 FLOOR_DIV (niter - j0, j1));
2269 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2270
2271 /* If the overlap occurs outside of the bounds of the
2272 loop, there is no dependence. */
2273 if (x1 >= niter || y1 >= niter)
2274 {
2275 *overlaps_a = conflict_fn_no_dependence ();
2276 *overlaps_b = conflict_fn_no_dependence ();
2277 *last_conflicts = integer_zero_node;
2278 goto end_analyze_subs_aa;
2279 }
2280 else
2281 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2282 }
2283 else
2284 *last_conflicts = chrec_dont_know;
2285
2286 *overlaps_a
2287 = conflict_fn (1,
2288 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2289 1,
2290 build_int_cst (NULL_TREE, i1)));
2291 *overlaps_b
2292 = conflict_fn (1,
2293 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2294 1,
2295 build_int_cst (NULL_TREE, j1)));
2296 }
2297 else
2298 {
2299 /* FIXME: For the moment, the upper bound of the
2300 iteration domain for i and j is not checked. */
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2303 *overlaps_a = conflict_fn_not_known ();
2304 *overlaps_b = conflict_fn_not_known ();
2305 *last_conflicts = chrec_dont_know;
2306 }
2307 }
2308 else
2309 {
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2312 *overlaps_a = conflict_fn_not_known ();
2313 *overlaps_b = conflict_fn_not_known ();
2314 *last_conflicts = chrec_dont_know;
2315 }
2316 }
2317 else
2318 {
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2321 *overlaps_a = conflict_fn_not_known ();
2322 *overlaps_b = conflict_fn_not_known ();
2323 *last_conflicts = chrec_dont_know;
2324 }
2325
2326 end_analyze_subs_aa:
2327 obstack_free (&scratch_obstack, NULL);
2328 if (dump_file && (dump_flags & TDF_DETAILS))
2329 {
2330 fprintf (dump_file, " (overlaps_a = ");
2331 dump_conflict_function (dump_file, *overlaps_a);
2332 fprintf (dump_file, ")\n (overlaps_b = ");
2333 dump_conflict_function (dump_file, *overlaps_b);
2334 fprintf (dump_file, ")\n");
2335 fprintf (dump_file, ")\n");
2336 }
2337 }
2338
2339 /* Returns true when analyze_subscript_affine_affine can be used for
2340 determining the dependence relation between chrec_a and chrec_b,
2341 that contain symbols. This function modifies chrec_a and chrec_b
2342 such that the analysis result is the same, and such that they don't
2343 contain symbols, and then can safely be passed to the analyzer.
2344
2345 Example: The analysis of the following tuples of evolutions produce
2346 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2347 vs. {0, +, 1}_1
2348
2349 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2350 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2351 */
2352
2353 static bool
2354 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2355 {
2356 tree diff, type, left_a, left_b, right_b;
2357
2358 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2359 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2360 /* FIXME: For the moment not handled. Might be refined later. */
2361 return false;
2362
2363 type = chrec_type (*chrec_a);
2364 left_a = CHREC_LEFT (*chrec_a);
2365 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2366 diff = chrec_fold_minus (type, left_a, left_b);
2367
2368 if (!evolution_function_is_constant_p (diff))
2369 return false;
2370
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2373
2374 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2375 diff, CHREC_RIGHT (*chrec_a));
2376 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2377 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2378 build_int_cst (type, 0),
2379 right_b);
2380 return true;
2381 }
2382
2383 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2384 *OVERLAPS_B are initialized to the functions that describe the
2385 relation between the elements accessed twice by CHREC_A and
2386 CHREC_B. For k >= 0, the following property is verified:
2387
2388 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2389
2390 static void
2391 analyze_siv_subscript (tree chrec_a,
2392 tree chrec_b,
2393 conflict_function **overlaps_a,
2394 conflict_function **overlaps_b,
2395 tree *last_conflicts,
2396 int loop_nest_num)
2397 {
2398 dependence_stats.num_siv++;
2399
2400 if (dump_file && (dump_flags & TDF_DETAILS))
2401 fprintf (dump_file, "(analyze_siv_subscript \n");
2402
2403 if (evolution_function_is_constant_p (chrec_a)
2404 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2405 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2406 overlaps_a, overlaps_b, last_conflicts);
2407
2408 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2409 && evolution_function_is_constant_p (chrec_b))
2410 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2411 overlaps_b, overlaps_a, last_conflicts);
2412
2413 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2414 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2415 {
2416 if (!chrec_contains_symbols (chrec_a)
2417 && !chrec_contains_symbols (chrec_b))
2418 {
2419 analyze_subscript_affine_affine (chrec_a, chrec_b,
2420 overlaps_a, overlaps_b,
2421 last_conflicts);
2422
2423 if (CF_NOT_KNOWN_P (*overlaps_a)
2424 || CF_NOT_KNOWN_P (*overlaps_b))
2425 dependence_stats.num_siv_unimplemented++;
2426 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2427 || CF_NO_DEPENDENCE_P (*overlaps_b))
2428 dependence_stats.num_siv_independent++;
2429 else
2430 dependence_stats.num_siv_dependent++;
2431 }
2432 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2433 &chrec_b))
2434 {
2435 analyze_subscript_affine_affine (chrec_a, chrec_b,
2436 overlaps_a, overlaps_b,
2437 last_conflicts);
2438
2439 if (CF_NOT_KNOWN_P (*overlaps_a)
2440 || CF_NOT_KNOWN_P (*overlaps_b))
2441 dependence_stats.num_siv_unimplemented++;
2442 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2443 || CF_NO_DEPENDENCE_P (*overlaps_b))
2444 dependence_stats.num_siv_independent++;
2445 else
2446 dependence_stats.num_siv_dependent++;
2447 }
2448 else
2449 goto siv_subscript_dontknow;
2450 }
2451
2452 else
2453 {
2454 siv_subscript_dontknow:;
2455 if (dump_file && (dump_flags & TDF_DETAILS))
2456 fprintf (dump_file, "siv test failed: unimplemented.\n");
2457 *overlaps_a = conflict_fn_not_known ();
2458 *overlaps_b = conflict_fn_not_known ();
2459 *last_conflicts = chrec_dont_know;
2460 dependence_stats.num_siv_unimplemented++;
2461 }
2462
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file, ")\n");
2465 }
2466
2467 /* Returns false if we can prove that the greatest common divisor of the steps
2468 of CHREC does not divide CST, false otherwise. */
2469
2470 static bool
2471 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2472 {
2473 HOST_WIDE_INT cd = 0, val;
2474 tree step;
2475
2476 if (!host_integerp (cst, 0))
2477 return true;
2478 val = tree_low_cst (cst, 0);
2479
2480 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2481 {
2482 step = CHREC_RIGHT (chrec);
2483 if (!host_integerp (step, 0))
2484 return true;
2485 cd = gcd (cd, tree_low_cst (step, 0));
2486 chrec = CHREC_LEFT (chrec);
2487 }
2488
2489 return val % cd == 0;
2490 }
2491
2492 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2493 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2494 functions that describe the relation between the elements accessed
2495 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2496 is verified:
2497
2498 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2499
2500 static void
2501 analyze_miv_subscript (tree chrec_a,
2502 tree chrec_b,
2503 conflict_function **overlaps_a,
2504 conflict_function **overlaps_b,
2505 tree *last_conflicts,
2506 struct loop *loop_nest)
2507 {
2508 /* FIXME: This is a MIV subscript, not yet handled.
2509 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2510 (A[i] vs. A[j]).
2511
2512 In the SIV test we had to solve a Diophantine equation with two
2513 variables. In the MIV case we have to solve a Diophantine
2514 equation with 2*n variables (if the subscript uses n IVs).
2515 */
2516 tree type, difference;
2517
2518 dependence_stats.num_miv++;
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2520 fprintf (dump_file, "(analyze_miv_subscript \n");
2521
2522 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2523 chrec_a = chrec_convert (type, chrec_a, NULL);
2524 chrec_b = chrec_convert (type, chrec_b, NULL);
2525 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2526
2527 if (eq_evolutions_p (chrec_a, chrec_b))
2528 {
2529 /* Access functions are the same: all the elements are accessed
2530 in the same order. */
2531 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2532 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2533 *last_conflicts = estimated_loop_iterations_tree
2534 (get_chrec_loop (chrec_a), true);
2535 dependence_stats.num_miv_dependent++;
2536 }
2537
2538 else if (evolution_function_is_constant_p (difference)
2539 /* For the moment, the following is verified:
2540 evolution_function_is_affine_multivariate_p (chrec_a,
2541 loop_nest->num) */
2542 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2543 {
2544 /* testsuite/.../ssa-chrec-33.c
2545 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2546
2547 The difference is 1, and all the evolution steps are multiples
2548 of 2, consequently there are no overlapping elements. */
2549 *overlaps_a = conflict_fn_no_dependence ();
2550 *overlaps_b = conflict_fn_no_dependence ();
2551 *last_conflicts = integer_zero_node;
2552 dependence_stats.num_miv_independent++;
2553 }
2554
2555 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2556 && !chrec_contains_symbols (chrec_a)
2557 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2558 && !chrec_contains_symbols (chrec_b))
2559 {
2560 /* testsuite/.../ssa-chrec-35.c
2561 {0, +, 1}_2 vs. {0, +, 1}_3
2562 the overlapping elements are respectively located at iterations:
2563 {0, +, 1}_x and {0, +, 1}_x,
2564 in other words, we have the equality:
2565 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2566
2567 Other examples:
2568 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2569 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2570
2571 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2572 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2573 */
2574 analyze_subscript_affine_affine (chrec_a, chrec_b,
2575 overlaps_a, overlaps_b, last_conflicts);
2576
2577 if (CF_NOT_KNOWN_P (*overlaps_a)
2578 || CF_NOT_KNOWN_P (*overlaps_b))
2579 dependence_stats.num_miv_unimplemented++;
2580 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2581 || CF_NO_DEPENDENCE_P (*overlaps_b))
2582 dependence_stats.num_miv_independent++;
2583 else
2584 dependence_stats.num_miv_dependent++;
2585 }
2586
2587 else
2588 {
2589 /* When the analysis is too difficult, answer "don't know". */
2590 if (dump_file && (dump_flags & TDF_DETAILS))
2591 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2592
2593 *overlaps_a = conflict_fn_not_known ();
2594 *overlaps_b = conflict_fn_not_known ();
2595 *last_conflicts = chrec_dont_know;
2596 dependence_stats.num_miv_unimplemented++;
2597 }
2598
2599 if (dump_file && (dump_flags & TDF_DETAILS))
2600 fprintf (dump_file, ")\n");
2601 }
2602
2603 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2604 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2605 OVERLAP_ITERATIONS_B are initialized with two functions that
2606 describe the iterations that contain conflicting elements.
2607
2608 Remark: For an integer k >= 0, the following equality is true:
2609
2610 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2611 */
2612
2613 static void
2614 analyze_overlapping_iterations (tree chrec_a,
2615 tree chrec_b,
2616 conflict_function **overlap_iterations_a,
2617 conflict_function **overlap_iterations_b,
2618 tree *last_conflicts, struct loop *loop_nest)
2619 {
2620 unsigned int lnn = loop_nest->num;
2621
2622 dependence_stats.num_subscript_tests++;
2623
2624 if (dump_file && (dump_flags & TDF_DETAILS))
2625 {
2626 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2627 fprintf (dump_file, " (chrec_a = ");
2628 print_generic_expr (dump_file, chrec_a, 0);
2629 fprintf (dump_file, ")\n (chrec_b = ");
2630 print_generic_expr (dump_file, chrec_b, 0);
2631 fprintf (dump_file, ")\n");
2632 }
2633
2634 if (chrec_a == NULL_TREE
2635 || chrec_b == NULL_TREE
2636 || chrec_contains_undetermined (chrec_a)
2637 || chrec_contains_undetermined (chrec_b))
2638 {
2639 dependence_stats.num_subscript_undetermined++;
2640
2641 *overlap_iterations_a = conflict_fn_not_known ();
2642 *overlap_iterations_b = conflict_fn_not_known ();
2643 }
2644
2645 /* If they are the same chrec, and are affine, they overlap
2646 on every iteration. */
2647 else if (eq_evolutions_p (chrec_a, chrec_b)
2648 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2649 || operand_equal_p (chrec_a, chrec_b, 0)))
2650 {
2651 dependence_stats.num_same_subscript_function++;
2652 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2653 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2654 *last_conflicts = chrec_dont_know;
2655 }
2656
2657 /* If they aren't the same, and aren't affine, we can't do anything
2658 yet. */
2659 else if ((chrec_contains_symbols (chrec_a)
2660 || chrec_contains_symbols (chrec_b))
2661 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2662 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2663 {
2664 dependence_stats.num_subscript_undetermined++;
2665 *overlap_iterations_a = conflict_fn_not_known ();
2666 *overlap_iterations_b = conflict_fn_not_known ();
2667 }
2668
2669 else if (ziv_subscript_p (chrec_a, chrec_b))
2670 analyze_ziv_subscript (chrec_a, chrec_b,
2671 overlap_iterations_a, overlap_iterations_b,
2672 last_conflicts);
2673
2674 else if (siv_subscript_p (chrec_a, chrec_b))
2675 analyze_siv_subscript (chrec_a, chrec_b,
2676 overlap_iterations_a, overlap_iterations_b,
2677 last_conflicts, lnn);
2678
2679 else
2680 analyze_miv_subscript (chrec_a, chrec_b,
2681 overlap_iterations_a, overlap_iterations_b,
2682 last_conflicts, loop_nest);
2683
2684 if (dump_file && (dump_flags & TDF_DETAILS))
2685 {
2686 fprintf (dump_file, " (overlap_iterations_a = ");
2687 dump_conflict_function (dump_file, *overlap_iterations_a);
2688 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2689 dump_conflict_function (dump_file, *overlap_iterations_b);
2690 fprintf (dump_file, ")\n");
2691 fprintf (dump_file, ")\n");
2692 }
2693 }
2694
2695 /* Helper function for uniquely inserting distance vectors. */
2696
2697 static void
2698 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2699 {
2700 unsigned i;
2701 lambda_vector v;
2702
2703 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2704 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2705 return;
2706
2707 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2708 }
2709
2710 /* Helper function for uniquely inserting direction vectors. */
2711
2712 static void
2713 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2714 {
2715 unsigned i;
2716 lambda_vector v;
2717
2718 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2719 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2720 return;
2721
2722 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2723 }
2724
2725 /* Add a distance of 1 on all the loops outer than INDEX. If we
2726 haven't yet determined a distance for this outer loop, push a new
2727 distance vector composed of the previous distance, and a distance
2728 of 1 for this outer loop. Example:
2729
2730 | loop_1
2731 | loop_2
2732 | A[10]
2733 | endloop_2
2734 | endloop_1
2735
2736 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2737 save (0, 1), then we have to save (1, 0). */
2738
2739 static void
2740 add_outer_distances (struct data_dependence_relation *ddr,
2741 lambda_vector dist_v, int index)
2742 {
2743 /* For each outer loop where init_v is not set, the accesses are
2744 in dependence of distance 1 in the loop. */
2745 while (--index >= 0)
2746 {
2747 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2748 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2749 save_v[index] = 1;
2750 save_dist_v (ddr, save_v);
2751 }
2752 }
2753
2754 /* Return false when fail to represent the data dependence as a
2755 distance vector. INIT_B is set to true when a component has been
2756 added to the distance vector DIST_V. INDEX_CARRY is then set to
2757 the index in DIST_V that carries the dependence. */
2758
2759 static bool
2760 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2761 struct data_reference *ddr_a,
2762 struct data_reference *ddr_b,
2763 lambda_vector dist_v, bool *init_b,
2764 int *index_carry)
2765 {
2766 unsigned i;
2767 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2768
2769 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2770 {
2771 tree access_fn_a, access_fn_b;
2772 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2773
2774 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2775 {
2776 non_affine_dependence_relation (ddr);
2777 return false;
2778 }
2779
2780 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2781 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2782
2783 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2784 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2785 {
2786 int dist, index;
2787 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2788 DDR_LOOP_NEST (ddr));
2789 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2790 DDR_LOOP_NEST (ddr));
2791
2792 /* The dependence is carried by the outermost loop. Example:
2793 | loop_1
2794 | A[{4, +, 1}_1]
2795 | loop_2
2796 | A[{5, +, 1}_2]
2797 | endloop_2
2798 | endloop_1
2799 In this case, the dependence is carried by loop_1. */
2800 index = index_a < index_b ? index_a : index_b;
2801 *index_carry = MIN (index, *index_carry);
2802
2803 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2804 {
2805 non_affine_dependence_relation (ddr);
2806 return false;
2807 }
2808
2809 dist = int_cst_value (SUB_DISTANCE (subscript));
2810
2811 /* This is the subscript coupling test. If we have already
2812 recorded a distance for this loop (a distance coming from
2813 another subscript), it should be the same. For example,
2814 in the following code, there is no dependence:
2815
2816 | loop i = 0, N, 1
2817 | T[i+1][i] = ...
2818 | ... = T[i][i]
2819 | endloop
2820 */
2821 if (init_v[index] != 0 && dist_v[index] != dist)
2822 {
2823 finalize_ddr_dependent (ddr, chrec_known);
2824 return false;
2825 }
2826
2827 dist_v[index] = dist;
2828 init_v[index] = 1;
2829 *init_b = true;
2830 }
2831 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2832 {
2833 /* This can be for example an affine vs. constant dependence
2834 (T[i] vs. T[3]) that is not an affine dependence and is
2835 not representable as a distance vector. */
2836 non_affine_dependence_relation (ddr);
2837 return false;
2838 }
2839 }
2840
2841 return true;
2842 }
2843
2844 /* Return true when the DDR contains only constant access functions. */
2845
2846 static bool
2847 constant_access_functions (const struct data_dependence_relation *ddr)
2848 {
2849 unsigned i;
2850
2851 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2852 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2853 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2854 return false;
2855
2856 return true;
2857 }
2858
2859 /* Helper function for the case where DDR_A and DDR_B are the same
2860 multivariate access function with a constant step. For an example
2861 see pr34635-1.c. */
2862
2863 static void
2864 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2865 {
2866 int x_1, x_2;
2867 tree c_1 = CHREC_LEFT (c_2);
2868 tree c_0 = CHREC_LEFT (c_1);
2869 lambda_vector dist_v;
2870 int v1, v2, cd;
2871
2872 /* Polynomials with more than 2 variables are not handled yet. When
2873 the evolution steps are parameters, it is not possible to
2874 represent the dependence using classical distance vectors. */
2875 if (TREE_CODE (c_0) != INTEGER_CST
2876 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2877 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2878 {
2879 DDR_AFFINE_P (ddr) = false;
2880 return;
2881 }
2882
2883 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2884 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2885
2886 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2887 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2888 v1 = int_cst_value (CHREC_RIGHT (c_1));
2889 v2 = int_cst_value (CHREC_RIGHT (c_2));
2890 cd = gcd (v1, v2);
2891 v1 /= cd;
2892 v2 /= cd;
2893
2894 if (v2 < 0)
2895 {
2896 v2 = -v2;
2897 v1 = -v1;
2898 }
2899
2900 dist_v[x_1] = v2;
2901 dist_v[x_2] = -v1;
2902 save_dist_v (ddr, dist_v);
2903
2904 add_outer_distances (ddr, dist_v, x_1);
2905 }
2906
2907 /* Helper function for the case where DDR_A and DDR_B are the same
2908 access functions. */
2909
2910 static void
2911 add_other_self_distances (struct data_dependence_relation *ddr)
2912 {
2913 lambda_vector dist_v;
2914 unsigned i;
2915 int index_carry = DDR_NB_LOOPS (ddr);
2916
2917 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2918 {
2919 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2920
2921 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2922 {
2923 if (!evolution_function_is_univariate_p (access_fun))
2924 {
2925 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2926 {
2927 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2928 return;
2929 }
2930
2931 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2932
2933 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2934 add_multivariate_self_dist (ddr, access_fun);
2935 else
2936 /* The evolution step is not constant: it varies in
2937 the outer loop, so this cannot be represented by a
2938 distance vector. For example in pr34635.c the
2939 evolution is {0, +, {0, +, 4}_1}_2. */
2940 DDR_AFFINE_P (ddr) = false;
2941
2942 return;
2943 }
2944
2945 index_carry = MIN (index_carry,
2946 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2947 DDR_LOOP_NEST (ddr)));
2948 }
2949 }
2950
2951 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2952 add_outer_distances (ddr, dist_v, index_carry);
2953 }
2954
2955 static void
2956 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2957 {
2958 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2959
2960 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2961 save_dist_v (ddr, dist_v);
2962 }
2963
2964 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2965 is the case for example when access functions are the same and
2966 equal to a constant, as in:
2967
2968 | loop_1
2969 | A[3] = ...
2970 | ... = A[3]
2971 | endloop_1
2972
2973 in which case the distance vectors are (0) and (1). */
2974
2975 static void
2976 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2977 {
2978 unsigned i, j;
2979
2980 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2981 {
2982 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2983 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2984 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2985
2986 for (j = 0; j < ca->n; j++)
2987 if (affine_function_zero_p (ca->fns[j]))
2988 {
2989 insert_innermost_unit_dist_vector (ddr);
2990 return;
2991 }
2992
2993 for (j = 0; j < cb->n; j++)
2994 if (affine_function_zero_p (cb->fns[j]))
2995 {
2996 insert_innermost_unit_dist_vector (ddr);
2997 return;
2998 }
2999 }
3000 }
3001
3002 /* Compute the classic per loop distance vector. DDR is the data
3003 dependence relation to build a vector from. Return false when fail
3004 to represent the data dependence as a distance vector. */
3005
3006 static bool
3007 build_classic_dist_vector (struct data_dependence_relation *ddr,
3008 struct loop *loop_nest)
3009 {
3010 bool init_b = false;
3011 int index_carry = DDR_NB_LOOPS (ddr);
3012 lambda_vector dist_v;
3013
3014 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3015 return false;
3016
3017 if (same_access_functions (ddr))
3018 {
3019 /* Save the 0 vector. */
3020 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3021 save_dist_v (ddr, dist_v);
3022
3023 if (constant_access_functions (ddr))
3024 add_distance_for_zero_overlaps (ddr);
3025
3026 if (DDR_NB_LOOPS (ddr) > 1)
3027 add_other_self_distances (ddr);
3028
3029 return true;
3030 }
3031
3032 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3033 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3034 dist_v, &init_b, &index_carry))
3035 return false;
3036
3037 /* Save the distance vector if we initialized one. */
3038 if (init_b)
3039 {
3040 /* Verify a basic constraint: classic distance vectors should
3041 always be lexicographically positive.
3042
3043 Data references are collected in the order of execution of
3044 the program, thus for the following loop
3045
3046 | for (i = 1; i < 100; i++)
3047 | for (j = 1; j < 100; j++)
3048 | {
3049 | t = T[j+1][i-1]; // A
3050 | T[j][i] = t + 2; // B
3051 | }
3052
3053 references are collected following the direction of the wind:
3054 A then B. The data dependence tests are performed also
3055 following this order, such that we're looking at the distance
3056 separating the elements accessed by A from the elements later
3057 accessed by B. But in this example, the distance returned by
3058 test_dep (A, B) is lexicographically negative (-1, 1), that
3059 means that the access A occurs later than B with respect to
3060 the outer loop, ie. we're actually looking upwind. In this
3061 case we solve test_dep (B, A) looking downwind to the
3062 lexicographically positive solution, that returns the
3063 distance vector (1, -1). */
3064 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3065 {
3066 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3067 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3068 loop_nest))
3069 return false;
3070 compute_subscript_distance (ddr);
3071 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3072 save_v, &init_b, &index_carry))
3073 return false;
3074 save_dist_v (ddr, save_v);
3075 DDR_REVERSED_P (ddr) = true;
3076
3077 /* In this case there is a dependence forward for all the
3078 outer loops:
3079
3080 | for (k = 1; k < 100; k++)
3081 | for (i = 1; i < 100; i++)
3082 | for (j = 1; j < 100; j++)
3083 | {
3084 | t = T[j+1][i-1]; // A
3085 | T[j][i] = t + 2; // B
3086 | }
3087
3088 the vectors are:
3089 (0, 1, -1)
3090 (1, 1, -1)
3091 (1, -1, 1)
3092 */
3093 if (DDR_NB_LOOPS (ddr) > 1)
3094 {
3095 add_outer_distances (ddr, save_v, index_carry);
3096 add_outer_distances (ddr, dist_v, index_carry);
3097 }
3098 }
3099 else
3100 {
3101 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3102 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3103
3104 if (DDR_NB_LOOPS (ddr) > 1)
3105 {
3106 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3107
3108 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3109 DDR_A (ddr), loop_nest))
3110 return false;
3111 compute_subscript_distance (ddr);
3112 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3113 opposite_v, &init_b,
3114 &index_carry))
3115 return false;
3116
3117 save_dist_v (ddr, save_v);
3118 add_outer_distances (ddr, dist_v, index_carry);
3119 add_outer_distances (ddr, opposite_v, index_carry);
3120 }
3121 else
3122 save_dist_v (ddr, save_v);
3123 }
3124 }
3125 else
3126 {
3127 /* There is a distance of 1 on all the outer loops: Example:
3128 there is a dependence of distance 1 on loop_1 for the array A.
3129
3130 | loop_1
3131 | A[5] = ...
3132 | endloop
3133 */
3134 add_outer_distances (ddr, dist_v,
3135 lambda_vector_first_nz (dist_v,
3136 DDR_NB_LOOPS (ddr), 0));
3137 }
3138
3139 if (dump_file && (dump_flags & TDF_DETAILS))
3140 {
3141 unsigned i;
3142
3143 fprintf (dump_file, "(build_classic_dist_vector\n");
3144 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3145 {
3146 fprintf (dump_file, " dist_vector = (");
3147 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3148 DDR_NB_LOOPS (ddr));
3149 fprintf (dump_file, " )\n");
3150 }
3151 fprintf (dump_file, ")\n");
3152 }
3153
3154 return true;
3155 }
3156
3157 /* Return the direction for a given distance.
3158 FIXME: Computing dir this way is suboptimal, since dir can catch
3159 cases that dist is unable to represent. */
3160
3161 static inline enum data_dependence_direction
3162 dir_from_dist (int dist)
3163 {
3164 if (dist > 0)
3165 return dir_positive;
3166 else if (dist < 0)
3167 return dir_negative;
3168 else
3169 return dir_equal;
3170 }
3171
3172 /* Compute the classic per loop direction vector. DDR is the data
3173 dependence relation to build a vector from. */
3174
3175 static void
3176 build_classic_dir_vector (struct data_dependence_relation *ddr)
3177 {
3178 unsigned i, j;
3179 lambda_vector dist_v;
3180
3181 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3182 {
3183 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3184
3185 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3186 dir_v[j] = dir_from_dist (dist_v[j]);
3187
3188 save_dir_v (ddr, dir_v);
3189 }
3190 }
3191
3192 /* Helper function. Returns true when there is a dependence between
3193 data references DRA and DRB. */
3194
3195 static bool
3196 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3197 struct data_reference *dra,
3198 struct data_reference *drb,
3199 struct loop *loop_nest)
3200 {
3201 unsigned int i;
3202 tree last_conflicts;
3203 struct subscript *subscript;
3204
3205 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3206 i++)
3207 {
3208 conflict_function *overlaps_a, *overlaps_b;
3209
3210 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3211 DR_ACCESS_FN (drb, i),
3212 &overlaps_a, &overlaps_b,
3213 &last_conflicts, loop_nest);
3214
3215 if (CF_NOT_KNOWN_P (overlaps_a)
3216 || CF_NOT_KNOWN_P (overlaps_b))
3217 {
3218 finalize_ddr_dependent (ddr, chrec_dont_know);
3219 dependence_stats.num_dependence_undetermined++;
3220 free_conflict_function (overlaps_a);
3221 free_conflict_function (overlaps_b);
3222 return false;
3223 }
3224
3225 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3226 || CF_NO_DEPENDENCE_P (overlaps_b))
3227 {
3228 finalize_ddr_dependent (ddr, chrec_known);
3229 dependence_stats.num_dependence_independent++;
3230 free_conflict_function (overlaps_a);
3231 free_conflict_function (overlaps_b);
3232 return false;
3233 }
3234
3235 else
3236 {
3237 if (SUB_CONFLICTS_IN_A (subscript))
3238 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3239 if (SUB_CONFLICTS_IN_B (subscript))
3240 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3241
3242 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3243 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3244 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3245 }
3246 }
3247
3248 return true;
3249 }
3250
3251 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3252
3253 static void
3254 subscript_dependence_tester (struct data_dependence_relation *ddr,
3255 struct loop *loop_nest)
3256 {
3257
3258 if (dump_file && (dump_flags & TDF_DETAILS))
3259 fprintf (dump_file, "(subscript_dependence_tester \n");
3260
3261 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3262 dependence_stats.num_dependence_dependent++;
3263
3264 compute_subscript_distance (ddr);
3265 if (build_classic_dist_vector (ddr, loop_nest))
3266 build_classic_dir_vector (ddr);
3267
3268 if (dump_file && (dump_flags & TDF_DETAILS))
3269 fprintf (dump_file, ")\n");
3270 }
3271
3272 /* Returns true when all the access functions of A are affine or
3273 constant with respect to LOOP_NEST. */
3274
3275 static bool
3276 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3277 const struct loop *loop_nest)
3278 {
3279 unsigned int i;
3280 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3281 tree t;
3282
3283 FOR_EACH_VEC_ELT (tree, fns, i, t)
3284 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3285 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3286 return false;
3287
3288 return true;
3289 }
3290
3291 /* Initializes an equation for an OMEGA problem using the information
3292 contained in the ACCESS_FUN. Returns true when the operation
3293 succeeded.
3294
3295 PB is the omega constraint system.
3296 EQ is the number of the equation to be initialized.
3297 OFFSET is used for shifting the variables names in the constraints:
3298 a constrain is composed of 2 * the number of variables surrounding
3299 dependence accesses. OFFSET is set either to 0 for the first n variables,
3300 then it is set to n.
3301 ACCESS_FUN is expected to be an affine chrec. */
3302
3303 static bool
3304 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3305 unsigned int offset, tree access_fun,
3306 struct data_dependence_relation *ddr)
3307 {
3308 switch (TREE_CODE (access_fun))
3309 {
3310 case POLYNOMIAL_CHREC:
3311 {
3312 tree left = CHREC_LEFT (access_fun);
3313 tree right = CHREC_RIGHT (access_fun);
3314 int var = CHREC_VARIABLE (access_fun);
3315 unsigned var_idx;
3316
3317 if (TREE_CODE (right) != INTEGER_CST)
3318 return false;
3319
3320 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3321 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3322
3323 /* Compute the innermost loop index. */
3324 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3325
3326 if (offset == 0)
3327 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3328 += int_cst_value (right);
3329
3330 switch (TREE_CODE (left))
3331 {
3332 case POLYNOMIAL_CHREC:
3333 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3334
3335 case INTEGER_CST:
3336 pb->eqs[eq].coef[0] += int_cst_value (left);
3337 return true;
3338
3339 default:
3340 return false;
3341 }
3342 }
3343
3344 case INTEGER_CST:
3345 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3346 return true;
3347
3348 default:
3349 return false;
3350 }
3351 }
3352
3353 /* As explained in the comments preceding init_omega_for_ddr, we have
3354 to set up a system for each loop level, setting outer loops
3355 variation to zero, and current loop variation to positive or zero.
3356 Save each lexico positive distance vector. */
3357
3358 static void
3359 omega_extract_distance_vectors (omega_pb pb,
3360 struct data_dependence_relation *ddr)
3361 {
3362 int eq, geq;
3363 unsigned i, j;
3364 struct loop *loopi, *loopj;
3365 enum omega_result res;
3366
3367 /* Set a new problem for each loop in the nest. The basis is the
3368 problem that we have initialized until now. On top of this we
3369 add new constraints. */
3370 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3371 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3372 {
3373 int dist = 0;
3374 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3375 DDR_NB_LOOPS (ddr));
3376
3377 omega_copy_problem (copy, pb);
3378
3379 /* For all the outer loops "loop_j", add "dj = 0". */
3380 for (j = 0;
3381 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3382 {
3383 eq = omega_add_zero_eq (copy, omega_black);
3384 copy->eqs[eq].coef[j + 1] = 1;
3385 }
3386
3387 /* For "loop_i", add "0 <= di". */
3388 geq = omega_add_zero_geq (copy, omega_black);
3389 copy->geqs[geq].coef[i + 1] = 1;
3390
3391 /* Reduce the constraint system, and test that the current
3392 problem is feasible. */
3393 res = omega_simplify_problem (copy);
3394 if (res == omega_false
3395 || res == omega_unknown
3396 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3397 goto next_problem;
3398
3399 for (eq = 0; eq < copy->num_subs; eq++)
3400 if (copy->subs[eq].key == (int) i + 1)
3401 {
3402 dist = copy->subs[eq].coef[0];
3403 goto found_dist;
3404 }
3405
3406 if (dist == 0)
3407 {
3408 /* Reinitialize problem... */
3409 omega_copy_problem (copy, pb);
3410 for (j = 0;
3411 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3412 {
3413 eq = omega_add_zero_eq (copy, omega_black);
3414 copy->eqs[eq].coef[j + 1] = 1;
3415 }
3416
3417 /* ..., but this time "di = 1". */
3418 eq = omega_add_zero_eq (copy, omega_black);
3419 copy->eqs[eq].coef[i + 1] = 1;
3420 copy->eqs[eq].coef[0] = -1;
3421
3422 res = omega_simplify_problem (copy);
3423 if (res == omega_false
3424 || res == omega_unknown
3425 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3426 goto next_problem;
3427
3428 for (eq = 0; eq < copy->num_subs; eq++)
3429 if (copy->subs[eq].key == (int) i + 1)
3430 {
3431 dist = copy->subs[eq].coef[0];
3432 goto found_dist;
3433 }
3434 }
3435
3436 found_dist:;
3437 /* Save the lexicographically positive distance vector. */
3438 if (dist >= 0)
3439 {
3440 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3441 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3442
3443 dist_v[i] = dist;
3444
3445 for (eq = 0; eq < copy->num_subs; eq++)
3446 if (copy->subs[eq].key > 0)
3447 {
3448 dist = copy->subs[eq].coef[0];
3449 dist_v[copy->subs[eq].key - 1] = dist;
3450 }
3451
3452 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3453 dir_v[j] = dir_from_dist (dist_v[j]);
3454
3455 save_dist_v (ddr, dist_v);
3456 save_dir_v (ddr, dir_v);
3457 }
3458
3459 next_problem:;
3460 omega_free_problem (copy);
3461 }
3462 }
3463
3464 /* This is called for each subscript of a tuple of data references:
3465 insert an equality for representing the conflicts. */
3466
3467 static bool
3468 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3469 struct data_dependence_relation *ddr,
3470 omega_pb pb, bool *maybe_dependent)
3471 {
3472 int eq;
3473 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3474 TREE_TYPE (access_fun_b));
3475 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3476 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3477 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3478 tree minus_one;
3479
3480 /* When the fun_a - fun_b is not constant, the dependence is not
3481 captured by the classic distance vector representation. */
3482 if (TREE_CODE (difference) != INTEGER_CST)
3483 return false;
3484
3485 /* ZIV test. */
3486 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3487 {
3488 /* There is no dependence. */
3489 *maybe_dependent = false;
3490 return true;
3491 }
3492
3493 minus_one = build_int_cst (type, -1);
3494 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3495
3496 eq = omega_add_zero_eq (pb, omega_black);
3497 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3498 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3499 /* There is probably a dependence, but the system of
3500 constraints cannot be built: answer "don't know". */
3501 return false;
3502
3503 /* GCD test. */
3504 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3505 && !int_divides_p (lambda_vector_gcd
3506 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3507 2 * DDR_NB_LOOPS (ddr)),
3508 pb->eqs[eq].coef[0]))
3509 {
3510 /* There is no dependence. */
3511 *maybe_dependent = false;
3512 return true;
3513 }
3514
3515 return true;
3516 }
3517
3518 /* Helper function, same as init_omega_for_ddr but specialized for
3519 data references A and B. */
3520
3521 static bool
3522 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3523 struct data_dependence_relation *ddr,
3524 omega_pb pb, bool *maybe_dependent)
3525 {
3526 unsigned i;
3527 int ineq;
3528 struct loop *loopi;
3529 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3530
3531 /* Insert an equality per subscript. */
3532 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3533 {
3534 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3535 ddr, pb, maybe_dependent))
3536 return false;
3537 else if (*maybe_dependent == false)
3538 {
3539 /* There is no dependence. */
3540 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3541 return true;
3542 }
3543 }
3544
3545 /* Insert inequalities: constraints corresponding to the iteration
3546 domain, i.e. the loops surrounding the references "loop_x" and
3547 the distance variables "dx". The layout of the OMEGA
3548 representation is as follows:
3549 - coef[0] is the constant
3550 - coef[1..nb_loops] are the protected variables that will not be
3551 removed by the solver: the "dx"
3552 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3553 */
3554 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3555 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3556 {
3557 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3558
3559 /* 0 <= loop_x */
3560 ineq = omega_add_zero_geq (pb, omega_black);
3561 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3562
3563 /* 0 <= loop_x + dx */
3564 ineq = omega_add_zero_geq (pb, omega_black);
3565 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3566 pb->geqs[ineq].coef[i + 1] = 1;
3567
3568 if (nbi != -1)
3569 {
3570 /* loop_x <= nb_iters */
3571 ineq = omega_add_zero_geq (pb, omega_black);
3572 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3573 pb->geqs[ineq].coef[0] = nbi;
3574
3575 /* loop_x + dx <= nb_iters */
3576 ineq = omega_add_zero_geq (pb, omega_black);
3577 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3578 pb->geqs[ineq].coef[i + 1] = -1;
3579 pb->geqs[ineq].coef[0] = nbi;
3580
3581 /* A step "dx" bigger than nb_iters is not feasible, so
3582 add "0 <= nb_iters + dx", */
3583 ineq = omega_add_zero_geq (pb, omega_black);
3584 pb->geqs[ineq].coef[i + 1] = 1;
3585 pb->geqs[ineq].coef[0] = nbi;
3586 /* and "dx <= nb_iters". */
3587 ineq = omega_add_zero_geq (pb, omega_black);
3588 pb->geqs[ineq].coef[i + 1] = -1;
3589 pb->geqs[ineq].coef[0] = nbi;
3590 }
3591 }
3592
3593 omega_extract_distance_vectors (pb, ddr);
3594
3595 return true;
3596 }
3597
3598 /* Sets up the Omega dependence problem for the data dependence
3599 relation DDR. Returns false when the constraint system cannot be
3600 built, ie. when the test answers "don't know". Returns true
3601 otherwise, and when independence has been proved (using one of the
3602 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3603 set MAYBE_DEPENDENT to true.
3604
3605 Example: for setting up the dependence system corresponding to the
3606 conflicting accesses
3607
3608 | loop_i
3609 | loop_j
3610 | A[i, i+1] = ...
3611 | ... A[2*j, 2*(i + j)]
3612 | endloop_j
3613 | endloop_i
3614
3615 the following constraints come from the iteration domain:
3616
3617 0 <= i <= Ni
3618 0 <= i + di <= Ni
3619 0 <= j <= Nj
3620 0 <= j + dj <= Nj
3621
3622 where di, dj are the distance variables. The constraints
3623 representing the conflicting elements are:
3624
3625 i = 2 * (j + dj)
3626 i + 1 = 2 * (i + di + j + dj)
3627
3628 For asking that the resulting distance vector (di, dj) be
3629 lexicographically positive, we insert the constraint "di >= 0". If
3630 "di = 0" in the solution, we fix that component to zero, and we
3631 look at the inner loops: we set a new problem where all the outer
3632 loop distances are zero, and fix this inner component to be
3633 positive. When one of the components is positive, we save that
3634 distance, and set a new problem where the distance on this loop is
3635 zero, searching for other distances in the inner loops. Here is
3636 the classic example that illustrates that we have to set for each
3637 inner loop a new problem:
3638
3639 | loop_1
3640 | loop_2
3641 | A[10]
3642 | endloop_2
3643 | endloop_1
3644
3645 we have to save two distances (1, 0) and (0, 1).
3646
3647 Given two array references, refA and refB, we have to set the
3648 dependence problem twice, refA vs. refB and refB vs. refA, and we
3649 cannot do a single test, as refB might occur before refA in the
3650 inner loops, and the contrary when considering outer loops: ex.
3651
3652 | loop_0
3653 | loop_1
3654 | loop_2
3655 | T[{1,+,1}_2][{1,+,1}_1] // refA
3656 | T[{2,+,1}_2][{0,+,1}_1] // refB
3657 | endloop_2
3658 | endloop_1
3659 | endloop_0
3660
3661 refB touches the elements in T before refA, and thus for the same
3662 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3663 but for successive loop_0 iterations, we have (1, -1, 1)
3664
3665 The Omega solver expects the distance variables ("di" in the
3666 previous example) to come first in the constraint system (as
3667 variables to be protected, or "safe" variables), the constraint
3668 system is built using the following layout:
3669
3670 "cst | distance vars | index vars".
3671 */
3672
3673 static bool
3674 init_omega_for_ddr (struct data_dependence_relation *ddr,
3675 bool *maybe_dependent)
3676 {
3677 omega_pb pb;
3678 bool res = false;
3679
3680 *maybe_dependent = true;
3681
3682 if (same_access_functions (ddr))
3683 {
3684 unsigned j;
3685 lambda_vector dir_v;
3686
3687 /* Save the 0 vector. */
3688 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3689 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3690 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3691 dir_v[j] = dir_equal;
3692 save_dir_v (ddr, dir_v);
3693
3694 /* Save the dependences carried by outer loops. */
3695 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3696 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3697 maybe_dependent);
3698 omega_free_problem (pb);
3699 return res;
3700 }
3701
3702 /* Omega expects the protected variables (those that have to be kept
3703 after elimination) to appear first in the constraint system.
3704 These variables are the distance variables. In the following
3705 initialization we declare NB_LOOPS safe variables, and the total
3706 number of variables for the constraint system is 2*NB_LOOPS. */
3707 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3708 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3709 maybe_dependent);
3710 omega_free_problem (pb);
3711
3712 /* Stop computation if not decidable, or no dependence. */
3713 if (res == false || *maybe_dependent == false)
3714 return res;
3715
3716 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3717 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3718 maybe_dependent);
3719 omega_free_problem (pb);
3720
3721 return res;
3722 }
3723
3724 /* Return true when DDR contains the same information as that stored
3725 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3726
3727 static bool
3728 ddr_consistent_p (FILE *file,
3729 struct data_dependence_relation *ddr,
3730 VEC (lambda_vector, heap) *dist_vects,
3731 VEC (lambda_vector, heap) *dir_vects)
3732 {
3733 unsigned int i, j;
3734
3735 /* If dump_file is set, output there. */
3736 if (dump_file && (dump_flags & TDF_DETAILS))
3737 file = dump_file;
3738
3739 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3740 {
3741 lambda_vector b_dist_v;
3742 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3743 VEC_length (lambda_vector, dist_vects),
3744 DDR_NUM_DIST_VECTS (ddr));
3745
3746 fprintf (file, "Banerjee dist vectors:\n");
3747 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3748 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3749
3750 fprintf (file, "Omega dist vectors:\n");
3751 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3752 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3753
3754 fprintf (file, "data dependence relation:\n");
3755 dump_data_dependence_relation (file, ddr);
3756
3757 fprintf (file, ")\n");
3758 return false;
3759 }
3760
3761 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3762 {
3763 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3764 VEC_length (lambda_vector, dir_vects),
3765 DDR_NUM_DIR_VECTS (ddr));
3766 return false;
3767 }
3768
3769 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3770 {
3771 lambda_vector a_dist_v;
3772 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3773
3774 /* Distance vectors are not ordered in the same way in the DDR
3775 and in the DIST_VECTS: search for a matching vector. */
3776 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3777 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3778 break;
3779
3780 if (j == VEC_length (lambda_vector, dist_vects))
3781 {
3782 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3783 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3784 fprintf (file, "not found in Omega dist vectors:\n");
3785 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3786 fprintf (file, "data dependence relation:\n");
3787 dump_data_dependence_relation (file, ddr);
3788 fprintf (file, ")\n");
3789 }
3790 }
3791
3792 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3793 {
3794 lambda_vector a_dir_v;
3795 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3796
3797 /* Direction vectors are not ordered in the same way in the DDR
3798 and in the DIR_VECTS: search for a matching vector. */
3799 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3800 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3801 break;
3802
3803 if (j == VEC_length (lambda_vector, dist_vects))
3804 {
3805 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3806 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3807 fprintf (file, "not found in Omega dir vectors:\n");
3808 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3809 fprintf (file, "data dependence relation:\n");
3810 dump_data_dependence_relation (file, ddr);
3811 fprintf (file, ")\n");
3812 }
3813 }
3814
3815 return true;
3816 }
3817
3818 /* This computes the affine dependence relation between A and B with
3819 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3820 independence between two accesses, while CHREC_DONT_KNOW is used
3821 for representing the unknown relation.
3822
3823 Note that it is possible to stop the computation of the dependence
3824 relation the first time we detect a CHREC_KNOWN element for a given
3825 subscript. */
3826
3827 static void
3828 compute_affine_dependence (struct data_dependence_relation *ddr,
3829 struct loop *loop_nest)
3830 {
3831 struct data_reference *dra = DDR_A (ddr);
3832 struct data_reference *drb = DDR_B (ddr);
3833
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3835 {
3836 fprintf (dump_file, "(compute_affine_dependence\n");
3837 fprintf (dump_file, " (stmt_a = \n");
3838 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3839 fprintf (dump_file, ")\n (stmt_b = \n");
3840 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3841 fprintf (dump_file, ")\n");
3842 }
3843
3844 /* Analyze only when the dependence relation is not yet known. */
3845 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3846 && !DDR_SELF_REFERENCE (ddr))
3847 {
3848 dependence_stats.num_dependence_tests++;
3849
3850 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3851 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3852 {
3853 if (flag_check_data_deps)
3854 {
3855 /* Compute the dependences using the first algorithm. */
3856 subscript_dependence_tester (ddr, loop_nest);
3857
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3859 {
3860 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3861 dump_data_dependence_relation (dump_file, ddr);
3862 }
3863
3864 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3865 {
3866 bool maybe_dependent;
3867 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3868
3869 /* Save the result of the first DD analyzer. */
3870 dist_vects = DDR_DIST_VECTS (ddr);
3871 dir_vects = DDR_DIR_VECTS (ddr);
3872
3873 /* Reset the information. */
3874 DDR_DIST_VECTS (ddr) = NULL;
3875 DDR_DIR_VECTS (ddr) = NULL;
3876
3877 /* Compute the same information using Omega. */
3878 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3879 goto csys_dont_know;
3880
3881 if (dump_file && (dump_flags & TDF_DETAILS))
3882 {
3883 fprintf (dump_file, "Omega Analyzer\n");
3884 dump_data_dependence_relation (dump_file, ddr);
3885 }
3886
3887 /* Check that we get the same information. */
3888 if (maybe_dependent)
3889 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3890 dir_vects));
3891 }
3892 }
3893 else
3894 subscript_dependence_tester (ddr, loop_nest);
3895 }
3896
3897 /* As a last case, if the dependence cannot be determined, or if
3898 the dependence is considered too difficult to determine, answer
3899 "don't know". */
3900 else
3901 {
3902 csys_dont_know:;
3903 dependence_stats.num_dependence_undetermined++;
3904
3905 if (dump_file && (dump_flags & TDF_DETAILS))
3906 {
3907 fprintf (dump_file, "Data ref a:\n");
3908 dump_data_reference (dump_file, dra);
3909 fprintf (dump_file, "Data ref b:\n");
3910 dump_data_reference (dump_file, drb);
3911 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3912 }
3913 finalize_ddr_dependent (ddr, chrec_dont_know);
3914 }
3915 }
3916
3917 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, ")\n");
3919 }
3920
3921 /* This computes the dependence relation for the same data
3922 reference into DDR. */
3923
3924 static void
3925 compute_self_dependence (struct data_dependence_relation *ddr)
3926 {
3927 unsigned int i;
3928 struct subscript *subscript;
3929
3930 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3931 return;
3932
3933 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3934 i++)
3935 {
3936 if (SUB_CONFLICTS_IN_A (subscript))
3937 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3938 if (SUB_CONFLICTS_IN_B (subscript))
3939 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3940
3941 /* The accessed index overlaps for each iteration. */
3942 SUB_CONFLICTS_IN_A (subscript)
3943 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3944 SUB_CONFLICTS_IN_B (subscript)
3945 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3946 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3947 }
3948
3949 /* The distance vector is the zero vector. */
3950 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3951 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3952 }
3953
3954 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3955 the data references in DATAREFS, in the LOOP_NEST. When
3956 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3957 relations. */
3958
3959 void
3960 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3961 VEC (ddr_p, heap) **dependence_relations,
3962 VEC (loop_p, heap) *loop_nest,
3963 bool compute_self_and_rr)
3964 {
3965 struct data_dependence_relation *ddr;
3966 struct data_reference *a, *b;
3967 unsigned int i, j;
3968
3969 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3970 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3971 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
3972 {
3973 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3974 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3975 if (loop_nest)
3976 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3977 }
3978
3979 if (compute_self_and_rr)
3980 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
3981 {
3982 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3983 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3984 compute_self_dependence (ddr);
3985 }
3986 }
3987
3988 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3989 true if STMT clobbers memory, false otherwise. */
3990
3991 bool
3992 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
3993 {
3994 bool clobbers_memory = false;
3995 data_ref_loc *ref;
3996 tree *op0, *op1;
3997 enum gimple_code stmt_code = gimple_code (stmt);
3998
3999 *references = NULL;
4000
4001 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4002 Calls have side-effects, except those to const or pure
4003 functions. */
4004 if ((stmt_code == GIMPLE_CALL
4005 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4006 || (stmt_code == GIMPLE_ASM
4007 && gimple_asm_volatile_p (stmt)))
4008 clobbers_memory = true;
4009
4010 if (!gimple_vuse (stmt))
4011 return clobbers_memory;
4012
4013 if (stmt_code == GIMPLE_ASSIGN)
4014 {
4015 tree base;
4016 op0 = gimple_assign_lhs_ptr (stmt);
4017 op1 = gimple_assign_rhs1_ptr (stmt);
4018
4019 if (DECL_P (*op1)
4020 || (REFERENCE_CLASS_P (*op1)
4021 && (base = get_base_address (*op1))
4022 && TREE_CODE (base) != SSA_NAME))
4023 {
4024 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4025 ref->pos = op1;
4026 ref->is_read = true;
4027 }
4028
4029 if (DECL_P (*op0)
4030 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4031 {
4032 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4033 ref->pos = op0;
4034 ref->is_read = false;
4035 }
4036 }
4037 else if (stmt_code == GIMPLE_CALL)
4038 {
4039 unsigned i, n = gimple_call_num_args (stmt);
4040
4041 for (i = 0; i < n; i++)
4042 {
4043 op0 = gimple_call_arg_ptr (stmt, i);
4044
4045 if (DECL_P (*op0)
4046 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4047 {
4048 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4049 ref->pos = op0;
4050 ref->is_read = true;
4051 }
4052 }
4053 }
4054
4055 return clobbers_memory;
4056 }
4057
4058 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4059 reference, returns false, otherwise returns true. NEST is the outermost
4060 loop of the loop nest in which the references should be analyzed. */
4061
4062 bool
4063 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4064 VEC (data_reference_p, heap) **datarefs)
4065 {
4066 unsigned i;
4067 VEC (data_ref_loc, heap) *references;
4068 data_ref_loc *ref;
4069 bool ret = true;
4070 data_reference_p dr;
4071
4072 if (get_references_in_stmt (stmt, &references))
4073 {
4074 VEC_free (data_ref_loc, heap, references);
4075 return false;
4076 }
4077
4078 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4079 {
4080 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4081 gcc_assert (dr != NULL);
4082
4083 /* FIXME -- data dependence analysis does not work correctly for objects
4084 with invariant addresses in loop nests. Let us fail here until the
4085 problem is fixed. */
4086 if (dr_address_invariant_p (dr) && nest)
4087 {
4088 free_data_ref (dr);
4089 if (dump_file && (dump_flags & TDF_DETAILS))
4090 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4091 ret = false;
4092 break;
4093 }
4094
4095 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4096 }
4097 VEC_free (data_ref_loc, heap, references);
4098 return ret;
4099 }
4100
4101 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4102 reference, returns false, otherwise returns true. NEST is the outermost
4103 loop of the loop nest in which the references should be analyzed. */
4104
4105 bool
4106 graphite_find_data_references_in_stmt (struct loop *nest, gimple stmt,
4107 VEC (data_reference_p, heap) **datarefs)
4108 {
4109 unsigned i;
4110 VEC (data_ref_loc, heap) *references;
4111 data_ref_loc *ref;
4112 bool ret = true;
4113 data_reference_p dr;
4114
4115 if (get_references_in_stmt (stmt, &references))
4116 {
4117 VEC_free (data_ref_loc, heap, references);
4118 return false;
4119 }
4120
4121 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4122 {
4123 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4124 gcc_assert (dr != NULL);
4125 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4126 }
4127
4128 VEC_free (data_ref_loc, heap, references);
4129 return ret;
4130 }
4131
4132 /* Search the data references in LOOP, and record the information into
4133 DATAREFS. Returns chrec_dont_know when failing to analyze a
4134 difficult case, returns NULL_TREE otherwise. */
4135
4136 static tree
4137 find_data_references_in_bb (struct loop *loop, basic_block bb,
4138 VEC (data_reference_p, heap) **datarefs)
4139 {
4140 gimple_stmt_iterator bsi;
4141
4142 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4143 {
4144 gimple stmt = gsi_stmt (bsi);
4145
4146 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4147 {
4148 struct data_reference *res;
4149 res = XCNEW (struct data_reference);
4150 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4151
4152 return chrec_dont_know;
4153 }
4154 }
4155
4156 return NULL_TREE;
4157 }
4158
4159 /* Search the data references in LOOP, and record the information into
4160 DATAREFS. Returns chrec_dont_know when failing to analyze a
4161 difficult case, returns NULL_TREE otherwise.
4162
4163 TODO: This function should be made smarter so that it can handle address
4164 arithmetic as if they were array accesses, etc. */
4165
4166 tree
4167 find_data_references_in_loop (struct loop *loop,
4168 VEC (data_reference_p, heap) **datarefs)
4169 {
4170 basic_block bb, *bbs;
4171 unsigned int i;
4172
4173 bbs = get_loop_body_in_dom_order (loop);
4174
4175 for (i = 0; i < loop->num_nodes; i++)
4176 {
4177 bb = bbs[i];
4178
4179 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4180 {
4181 free (bbs);
4182 return chrec_dont_know;
4183 }
4184 }
4185 free (bbs);
4186
4187 return NULL_TREE;
4188 }
4189
4190 /* Recursive helper function. */
4191
4192 static bool
4193 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4194 {
4195 /* Inner loops of the nest should not contain siblings. Example:
4196 when there are two consecutive loops,
4197
4198 | loop_0
4199 | loop_1
4200 | A[{0, +, 1}_1]
4201 | endloop_1
4202 | loop_2
4203 | A[{0, +, 1}_2]
4204 | endloop_2
4205 | endloop_0
4206
4207 the dependence relation cannot be captured by the distance
4208 abstraction. */
4209 if (loop->next)
4210 return false;
4211
4212 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4213 if (loop->inner)
4214 return find_loop_nest_1 (loop->inner, loop_nest);
4215 return true;
4216 }
4217
4218 /* Return false when the LOOP is not well nested. Otherwise return
4219 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4220 contain the loops from the outermost to the innermost, as they will
4221 appear in the classic distance vector. */
4222
4223 bool
4224 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4225 {
4226 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4227 if (loop->inner)
4228 return find_loop_nest_1 (loop->inner, loop_nest);
4229 return true;
4230 }
4231
4232 /* Returns true when the data dependences have been computed, false otherwise.
4233 Given a loop nest LOOP, the following vectors are returned:
4234 DATAREFS is initialized to all the array elements contained in this loop,
4235 DEPENDENCE_RELATIONS contains the relations between the data references.
4236 Compute read-read and self relations if
4237 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4238
4239 bool
4240 compute_data_dependences_for_loop (struct loop *loop,
4241 bool compute_self_and_read_read_dependences,
4242 VEC (loop_p, heap) **loop_nest,
4243 VEC (data_reference_p, heap) **datarefs,
4244 VEC (ddr_p, heap) **dependence_relations)
4245 {
4246 bool res = true;
4247
4248 memset (&dependence_stats, 0, sizeof (dependence_stats));
4249
4250 /* If the loop nest is not well formed, or one of the data references
4251 is not computable, give up without spending time to compute other
4252 dependences. */
4253 if (!loop
4254 || !find_loop_nest (loop, loop_nest)
4255 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4256 {
4257 struct data_dependence_relation *ddr;
4258
4259 /* Insert a single relation into dependence_relations:
4260 chrec_dont_know. */
4261 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4262 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4263 res = false;
4264 }
4265 else
4266 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4267 compute_self_and_read_read_dependences);
4268
4269 if (dump_file && (dump_flags & TDF_STATS))
4270 {
4271 fprintf (dump_file, "Dependence tester statistics:\n");
4272
4273 fprintf (dump_file, "Number of dependence tests: %d\n",
4274 dependence_stats.num_dependence_tests);
4275 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4276 dependence_stats.num_dependence_dependent);
4277 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4278 dependence_stats.num_dependence_independent);
4279 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4280 dependence_stats.num_dependence_undetermined);
4281
4282 fprintf (dump_file, "Number of subscript tests: %d\n",
4283 dependence_stats.num_subscript_tests);
4284 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4285 dependence_stats.num_subscript_undetermined);
4286 fprintf (dump_file, "Number of same subscript function: %d\n",
4287 dependence_stats.num_same_subscript_function);
4288
4289 fprintf (dump_file, "Number of ziv tests: %d\n",
4290 dependence_stats.num_ziv);
4291 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4292 dependence_stats.num_ziv_dependent);
4293 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4294 dependence_stats.num_ziv_independent);
4295 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4296 dependence_stats.num_ziv_unimplemented);
4297
4298 fprintf (dump_file, "Number of siv tests: %d\n",
4299 dependence_stats.num_siv);
4300 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4301 dependence_stats.num_siv_dependent);
4302 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4303 dependence_stats.num_siv_independent);
4304 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4305 dependence_stats.num_siv_unimplemented);
4306
4307 fprintf (dump_file, "Number of miv tests: %d\n",
4308 dependence_stats.num_miv);
4309 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4310 dependence_stats.num_miv_dependent);
4311 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4312 dependence_stats.num_miv_independent);
4313 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4314 dependence_stats.num_miv_unimplemented);
4315 }
4316
4317 return res;
4318 }
4319
4320 /* Returns true when the data dependences for the basic block BB have been
4321 computed, false otherwise.
4322 DATAREFS is initialized to all the array elements contained in this basic
4323 block, DEPENDENCE_RELATIONS contains the relations between the data
4324 references. Compute read-read and self relations if
4325 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4326 bool
4327 compute_data_dependences_for_bb (basic_block bb,
4328 bool compute_self_and_read_read_dependences,
4329 VEC (data_reference_p, heap) **datarefs,
4330 VEC (ddr_p, heap) **dependence_relations)
4331 {
4332 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4333 return false;
4334
4335 compute_all_dependences (*datarefs, dependence_relations, NULL,
4336 compute_self_and_read_read_dependences);
4337 return true;
4338 }
4339
4340 /* Entry point (for testing only). Analyze all the data references
4341 and the dependence relations in LOOP.
4342
4343 The data references are computed first.
4344
4345 A relation on these nodes is represented by a complete graph. Some
4346 of the relations could be of no interest, thus the relations can be
4347 computed on demand.
4348
4349 In the following function we compute all the relations. This is
4350 just a first implementation that is here for:
4351 - for showing how to ask for the dependence relations,
4352 - for the debugging the whole dependence graph,
4353 - for the dejagnu testcases and maintenance.
4354
4355 It is possible to ask only for a part of the graph, avoiding to
4356 compute the whole dependence graph. The computed dependences are
4357 stored in a knowledge base (KB) such that later queries don't
4358 recompute the same information. The implementation of this KB is
4359 transparent to the optimizer, and thus the KB can be changed with a
4360 more efficient implementation, or the KB could be disabled. */
4361 static void
4362 analyze_all_data_dependences (struct loop *loop)
4363 {
4364 unsigned int i;
4365 int nb_data_refs = 10;
4366 VEC (data_reference_p, heap) *datarefs =
4367 VEC_alloc (data_reference_p, heap, nb_data_refs);
4368 VEC (ddr_p, heap) *dependence_relations =
4369 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4370 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4371
4372 /* Compute DDs on the whole function. */
4373 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4374 &dependence_relations);
4375
4376 if (dump_file)
4377 {
4378 dump_data_dependence_relations (dump_file, dependence_relations);
4379 fprintf (dump_file, "\n\n");
4380
4381 if (dump_flags & TDF_DETAILS)
4382 dump_dist_dir_vectors (dump_file, dependence_relations);
4383
4384 if (dump_flags & TDF_STATS)
4385 {
4386 unsigned nb_top_relations = 0;
4387 unsigned nb_bot_relations = 0;
4388 unsigned nb_chrec_relations = 0;
4389 struct data_dependence_relation *ddr;
4390
4391 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4392 {
4393 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4394 nb_top_relations++;
4395
4396 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4397 nb_bot_relations++;
4398
4399 else
4400 nb_chrec_relations++;
4401 }
4402
4403 gather_stats_on_scev_database ();
4404 }
4405 }
4406
4407 VEC_free (loop_p, heap, loop_nest);
4408 free_dependence_relations (dependence_relations);
4409 free_data_refs (datarefs);
4410 }
4411
4412 /* Computes all the data dependences and check that the results of
4413 several analyzers are the same. */
4414
4415 void
4416 tree_check_data_deps (void)
4417 {
4418 loop_iterator li;
4419 struct loop *loop_nest;
4420
4421 FOR_EACH_LOOP (li, loop_nest, 0)
4422 analyze_all_data_dependences (loop_nest);
4423 }
4424
4425 /* Free the memory used by a data dependence relation DDR. */
4426
4427 void
4428 free_dependence_relation (struct data_dependence_relation *ddr)
4429 {
4430 if (ddr == NULL)
4431 return;
4432
4433 if (DDR_SUBSCRIPTS (ddr))
4434 free_subscripts (DDR_SUBSCRIPTS (ddr));
4435 if (DDR_DIST_VECTS (ddr))
4436 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4437 if (DDR_DIR_VECTS (ddr))
4438 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4439
4440 free (ddr);
4441 }
4442
4443 /* Free the memory used by the data dependence relations from
4444 DEPENDENCE_RELATIONS. */
4445
4446 void
4447 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4448 {
4449 unsigned int i;
4450 struct data_dependence_relation *ddr;
4451
4452 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4453 if (ddr)
4454 free_dependence_relation (ddr);
4455
4456 VEC_free (ddr_p, heap, dependence_relations);
4457 }
4458
4459 /* Free the memory used by the data references from DATAREFS. */
4460
4461 void
4462 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4463 {
4464 unsigned int i;
4465 struct data_reference *dr;
4466
4467 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4468 free_data_ref (dr);
4469 VEC_free (data_reference_p, heap, datarefs);
4470 }
4471
4472 \f
4473
4474 /* Dump vertex I in RDG to FILE. */
4475
4476 void
4477 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4478 {
4479 struct vertex *v = &(rdg->vertices[i]);
4480 struct graph_edge *e;
4481
4482 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4483 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4484 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4485
4486 if (v->pred)
4487 for (e = v->pred; e; e = e->pred_next)
4488 fprintf (file, " %d", e->src);
4489
4490 fprintf (file, ") (out:");
4491
4492 if (v->succ)
4493 for (e = v->succ; e; e = e->succ_next)
4494 fprintf (file, " %d", e->dest);
4495
4496 fprintf (file, ")\n");
4497 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4498 fprintf (file, ")\n");
4499 }
4500
4501 /* Call dump_rdg_vertex on stderr. */
4502
4503 DEBUG_FUNCTION void
4504 debug_rdg_vertex (struct graph *rdg, int i)
4505 {
4506 dump_rdg_vertex (stderr, rdg, i);
4507 }
4508
4509 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4510 dumped vertices to that bitmap. */
4511
4512 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4513 {
4514 int i;
4515
4516 fprintf (file, "(%d\n", c);
4517
4518 for (i = 0; i < rdg->n_vertices; i++)
4519 if (rdg->vertices[i].component == c)
4520 {
4521 if (dumped)
4522 bitmap_set_bit (dumped, i);
4523
4524 dump_rdg_vertex (file, rdg, i);
4525 }
4526
4527 fprintf (file, ")\n");
4528 }
4529
4530 /* Call dump_rdg_vertex on stderr. */
4531
4532 DEBUG_FUNCTION void
4533 debug_rdg_component (struct graph *rdg, int c)
4534 {
4535 dump_rdg_component (stderr, rdg, c, NULL);
4536 }
4537
4538 /* Dump the reduced dependence graph RDG to FILE. */
4539
4540 void
4541 dump_rdg (FILE *file, struct graph *rdg)
4542 {
4543 int i;
4544 bitmap dumped = BITMAP_ALLOC (NULL);
4545
4546 fprintf (file, "(rdg\n");
4547
4548 for (i = 0; i < rdg->n_vertices; i++)
4549 if (!bitmap_bit_p (dumped, i))
4550 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4551
4552 fprintf (file, ")\n");
4553 BITMAP_FREE (dumped);
4554 }
4555
4556 /* Call dump_rdg on stderr. */
4557
4558 DEBUG_FUNCTION void
4559 debug_rdg (struct graph *rdg)
4560 {
4561 dump_rdg (stderr, rdg);
4562 }
4563
4564 static void
4565 dot_rdg_1 (FILE *file, struct graph *rdg)
4566 {
4567 int i;
4568
4569 fprintf (file, "digraph RDG {\n");
4570
4571 for (i = 0; i < rdg->n_vertices; i++)
4572 {
4573 struct vertex *v = &(rdg->vertices[i]);
4574 struct graph_edge *e;
4575
4576 /* Highlight reads from memory. */
4577 if (RDG_MEM_READS_STMT (rdg, i))
4578 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4579
4580 /* Highlight stores to memory. */
4581 if (RDG_MEM_WRITE_STMT (rdg, i))
4582 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4583
4584 if (v->succ)
4585 for (e = v->succ; e; e = e->succ_next)
4586 switch (RDGE_TYPE (e))
4587 {
4588 case input_dd:
4589 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4590 break;
4591
4592 case output_dd:
4593 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4594 break;
4595
4596 case flow_dd:
4597 /* These are the most common dependences: don't print these. */
4598 fprintf (file, "%d -> %d \n", i, e->dest);
4599 break;
4600
4601 case anti_dd:
4602 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4603 break;
4604
4605 default:
4606 gcc_unreachable ();
4607 }
4608 }
4609
4610 fprintf (file, "}\n\n");
4611 }
4612
4613 /* Display the Reduced Dependence Graph using dotty. */
4614 extern void dot_rdg (struct graph *);
4615
4616 DEBUG_FUNCTION void
4617 dot_rdg (struct graph *rdg)
4618 {
4619 /* When debugging, enable the following code. This cannot be used
4620 in production compilers because it calls "system". */
4621 #if 0
4622 FILE *file = fopen ("/tmp/rdg.dot", "w");
4623 gcc_assert (file != NULL);
4624
4625 dot_rdg_1 (file, rdg);
4626 fclose (file);
4627
4628 system ("dotty /tmp/rdg.dot &");
4629 #else
4630 dot_rdg_1 (stderr, rdg);
4631 #endif
4632 }
4633
4634 /* This structure is used for recording the mapping statement index in
4635 the RDG. */
4636
4637 struct GTY(()) rdg_vertex_info
4638 {
4639 gimple stmt;
4640 int index;
4641 };
4642
4643 /* Returns the index of STMT in RDG. */
4644
4645 int
4646 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4647 {
4648 struct rdg_vertex_info rvi, *slot;
4649
4650 rvi.stmt = stmt;
4651 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4652
4653 if (!slot)
4654 return -1;
4655
4656 return slot->index;
4657 }
4658
4659 /* Creates an edge in RDG for each distance vector from DDR. The
4660 order that we keep track of in the RDG is the order in which
4661 statements have to be executed. */
4662
4663 static void
4664 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4665 {
4666 struct graph_edge *e;
4667 int va, vb;
4668 data_reference_p dra = DDR_A (ddr);
4669 data_reference_p drb = DDR_B (ddr);
4670 unsigned level = ddr_dependence_level (ddr);
4671
4672 /* For non scalar dependences, when the dependence is REVERSED,
4673 statement B has to be executed before statement A. */
4674 if (level > 0
4675 && !DDR_REVERSED_P (ddr))
4676 {
4677 data_reference_p tmp = dra;
4678 dra = drb;
4679 drb = tmp;
4680 }
4681
4682 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4683 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4684
4685 if (va < 0 || vb < 0)
4686 return;
4687
4688 e = add_edge (rdg, va, vb);
4689 e->data = XNEW (struct rdg_edge);
4690
4691 RDGE_LEVEL (e) = level;
4692 RDGE_RELATION (e) = ddr;
4693
4694 /* Determines the type of the data dependence. */
4695 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4696 RDGE_TYPE (e) = input_dd;
4697 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4698 RDGE_TYPE (e) = output_dd;
4699 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4700 RDGE_TYPE (e) = flow_dd;
4701 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4702 RDGE_TYPE (e) = anti_dd;
4703 }
4704
4705 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4706 the index of DEF in RDG. */
4707
4708 static void
4709 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4710 {
4711 use_operand_p imm_use_p;
4712 imm_use_iterator iterator;
4713
4714 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4715 {
4716 struct graph_edge *e;
4717 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4718
4719 if (use < 0)
4720 continue;
4721
4722 e = add_edge (rdg, idef, use);
4723 e->data = XNEW (struct rdg_edge);
4724 RDGE_TYPE (e) = flow_dd;
4725 RDGE_RELATION (e) = NULL;
4726 }
4727 }
4728
4729 /* Creates the edges of the reduced dependence graph RDG. */
4730
4731 static void
4732 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4733 {
4734 int i;
4735 struct data_dependence_relation *ddr;
4736 def_operand_p def_p;
4737 ssa_op_iter iter;
4738
4739 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4740 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4741 create_rdg_edge_for_ddr (rdg, ddr);
4742
4743 for (i = 0; i < rdg->n_vertices; i++)
4744 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4745 iter, SSA_OP_DEF)
4746 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4747 }
4748
4749 /* Build the vertices of the reduced dependence graph RDG. */
4750
4751 void
4752 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4753 {
4754 int i, j;
4755 gimple stmt;
4756
4757 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4758 {
4759 VEC (data_ref_loc, heap) *references;
4760 data_ref_loc *ref;
4761 struct vertex *v = &(rdg->vertices[i]);
4762 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4763 struct rdg_vertex_info **slot;
4764
4765 rvi->stmt = stmt;
4766 rvi->index = i;
4767 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4768
4769 if (!*slot)
4770 *slot = rvi;
4771 else
4772 free (rvi);
4773
4774 v->data = XNEW (struct rdg_vertex);
4775 RDG_STMT (rdg, i) = stmt;
4776
4777 RDG_MEM_WRITE_STMT (rdg, i) = false;
4778 RDG_MEM_READS_STMT (rdg, i) = false;
4779 if (gimple_code (stmt) == GIMPLE_PHI)
4780 continue;
4781
4782 get_references_in_stmt (stmt, &references);
4783 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4784 if (!ref->is_read)
4785 RDG_MEM_WRITE_STMT (rdg, i) = true;
4786 else
4787 RDG_MEM_READS_STMT (rdg, i) = true;
4788
4789 VEC_free (data_ref_loc, heap, references);
4790 }
4791 }
4792
4793 /* Initialize STMTS with all the statements of LOOP. When
4794 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4795 which we discover statements is important as
4796 generate_loops_for_partition is using the same traversal for
4797 identifying statements. */
4798
4799 static void
4800 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4801 {
4802 unsigned int i;
4803 basic_block *bbs = get_loop_body_in_dom_order (loop);
4804
4805 for (i = 0; i < loop->num_nodes; i++)
4806 {
4807 basic_block bb = bbs[i];
4808 gimple_stmt_iterator bsi;
4809 gimple stmt;
4810
4811 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4812 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4813
4814 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4815 {
4816 stmt = gsi_stmt (bsi);
4817 if (gimple_code (stmt) != GIMPLE_LABEL)
4818 VEC_safe_push (gimple, heap, *stmts, stmt);
4819 }
4820 }
4821
4822 free (bbs);
4823 }
4824
4825 /* Returns true when all the dependences are computable. */
4826
4827 static bool
4828 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4829 {
4830 ddr_p ddr;
4831 unsigned int i;
4832
4833 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4834 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4835 return false;
4836
4837 return true;
4838 }
4839
4840 /* Computes a hash function for element ELT. */
4841
4842 static hashval_t
4843 hash_stmt_vertex_info (const void *elt)
4844 {
4845 const struct rdg_vertex_info *const rvi =
4846 (const struct rdg_vertex_info *) elt;
4847 gimple stmt = rvi->stmt;
4848
4849 return htab_hash_pointer (stmt);
4850 }
4851
4852 /* Compares database elements E1 and E2. */
4853
4854 static int
4855 eq_stmt_vertex_info (const void *e1, const void *e2)
4856 {
4857 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4858 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4859
4860 return elt1->stmt == elt2->stmt;
4861 }
4862
4863 /* Free the element E. */
4864
4865 static void
4866 hash_stmt_vertex_del (void *e)
4867 {
4868 free (e);
4869 }
4870
4871 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4872 statement of the loop nest, and one edge per data dependence or
4873 scalar dependence. */
4874
4875 struct graph *
4876 build_empty_rdg (int n_stmts)
4877 {
4878 int nb_data_refs = 10;
4879 struct graph *rdg = new_graph (n_stmts);
4880
4881 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4882 eq_stmt_vertex_info, hash_stmt_vertex_del);
4883 return rdg;
4884 }
4885
4886 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4887 statement of the loop nest, and one edge per data dependence or
4888 scalar dependence. */
4889
4890 struct graph *
4891 build_rdg (struct loop *loop,
4892 VEC (loop_p, heap) **loop_nest,
4893 VEC (ddr_p, heap) **dependence_relations,
4894 VEC (data_reference_p, heap) **datarefs)
4895 {
4896 struct graph *rdg = NULL;
4897 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
4898
4899 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
4900 dependence_relations);
4901
4902 if (known_dependences_p (*dependence_relations))
4903 {
4904 stmts_from_loop (loop, &stmts);
4905 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4906 create_rdg_vertices (rdg, stmts);
4907 create_rdg_edges (rdg, *dependence_relations);
4908 }
4909
4910 VEC_free (gimple, heap, stmts);
4911 return rdg;
4912 }
4913
4914 /* Free the reduced dependence graph RDG. */
4915
4916 void
4917 free_rdg (struct graph *rdg)
4918 {
4919 int i;
4920
4921 for (i = 0; i < rdg->n_vertices; i++)
4922 {
4923 struct vertex *v = &(rdg->vertices[i]);
4924 struct graph_edge *e;
4925
4926 for (e = v->succ; e; e = e->succ_next)
4927 if (e->data)
4928 free (e->data);
4929
4930 if (v->data)
4931 free (v->data);
4932 }
4933
4934 htab_delete (rdg->indices);
4935 free_graph (rdg);
4936 }
4937
4938 /* Initialize STMTS with all the statements of LOOP that contain a
4939 store to memory. */
4940
4941 void
4942 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4943 {
4944 unsigned int i;
4945 basic_block *bbs = get_loop_body_in_dom_order (loop);
4946
4947 for (i = 0; i < loop->num_nodes; i++)
4948 {
4949 basic_block bb = bbs[i];
4950 gimple_stmt_iterator bsi;
4951
4952 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4953 if (gimple_vdef (gsi_stmt (bsi)))
4954 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4955 }
4956
4957 free (bbs);
4958 }
4959
4960 /* Returns true when the statement at STMT is of the form "A[i] = 0"
4961 that contains a data reference on its LHS with a stride of the same
4962 size as its unit type. */
4963
4964 bool
4965 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
4966 {
4967 tree op0, op1;
4968 bool res;
4969 struct data_reference *dr;
4970
4971 if (!stmt
4972 || !gimple_vdef (stmt)
4973 || !is_gimple_assign (stmt)
4974 || !gimple_assign_single_p (stmt)
4975 || !(op1 = gimple_assign_rhs1 (stmt))
4976 || !(integer_zerop (op1) || real_zerop (op1)))
4977 return false;
4978
4979 dr = XCNEW (struct data_reference);
4980 op0 = gimple_assign_lhs (stmt);
4981
4982 DR_STMT (dr) = stmt;
4983 DR_REF (dr) = op0;
4984
4985 res = dr_analyze_innermost (dr)
4986 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
4987
4988 free_data_ref (dr);
4989 return res;
4990 }
4991
4992 /* Initialize STMTS with all the statements of LOOP that contain a
4993 store to memory of the form "A[i] = 0". */
4994
4995 void
4996 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4997 {
4998 unsigned int i;
4999 basic_block bb;
5000 gimple_stmt_iterator si;
5001 gimple stmt;
5002 basic_block *bbs = get_loop_body_in_dom_order (loop);
5003
5004 for (i = 0; i < loop->num_nodes; i++)
5005 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5006 if ((stmt = gsi_stmt (si))
5007 && stmt_with_adjacent_zero_store_dr_p (stmt))
5008 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5009
5010 free (bbs);
5011 }
5012
5013 /* For a data reference REF, return the declaration of its base
5014 address or NULL_TREE if the base is not determined. */
5015
5016 static inline tree
5017 ref_base_address (gimple stmt, data_ref_loc *ref)
5018 {
5019 tree base = NULL_TREE;
5020 tree base_address;
5021 struct data_reference *dr = XCNEW (struct data_reference);
5022
5023 DR_STMT (dr) = stmt;
5024 DR_REF (dr) = *ref->pos;
5025 dr_analyze_innermost (dr);
5026 base_address = DR_BASE_ADDRESS (dr);
5027
5028 if (!base_address)
5029 goto end;
5030
5031 switch (TREE_CODE (base_address))
5032 {
5033 case ADDR_EXPR:
5034 base = TREE_OPERAND (base_address, 0);
5035 break;
5036
5037 default:
5038 base = base_address;
5039 break;
5040 }
5041
5042 end:
5043 free_data_ref (dr);
5044 return base;
5045 }
5046
5047 /* Determines whether the statement from vertex V of the RDG has a
5048 definition used outside the loop that contains this statement. */
5049
5050 bool
5051 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5052 {
5053 gimple stmt = RDG_STMT (rdg, v);
5054 struct loop *loop = loop_containing_stmt (stmt);
5055 use_operand_p imm_use_p;
5056 imm_use_iterator iterator;
5057 ssa_op_iter it;
5058 def_operand_p def_p;
5059
5060 if (!loop)
5061 return true;
5062
5063 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5064 {
5065 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5066 {
5067 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5068 return true;
5069 }
5070 }
5071
5072 return false;
5073 }
5074
5075 /* Determines whether statements S1 and S2 access to similar memory
5076 locations. Two memory accesses are considered similar when they
5077 have the same base address declaration, i.e. when their
5078 ref_base_address is the same. */
5079
5080 bool
5081 have_similar_memory_accesses (gimple s1, gimple s2)
5082 {
5083 bool res = false;
5084 unsigned i, j;
5085 VEC (data_ref_loc, heap) *refs1, *refs2;
5086 data_ref_loc *ref1, *ref2;
5087
5088 get_references_in_stmt (s1, &refs1);
5089 get_references_in_stmt (s2, &refs2);
5090
5091 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5092 {
5093 tree base1 = ref_base_address (s1, ref1);
5094
5095 if (base1)
5096 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5097 if (base1 == ref_base_address (s2, ref2))
5098 {
5099 res = true;
5100 goto end;
5101 }
5102 }
5103
5104 end:
5105 VEC_free (data_ref_loc, heap, refs1);
5106 VEC_free (data_ref_loc, heap, refs2);
5107 return res;
5108 }
5109
5110 /* Helper function for the hashtab. */
5111
5112 static int
5113 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5114 {
5115 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5116 CONST_CAST_GIMPLE ((const_gimple) s2));
5117 }
5118
5119 /* Helper function for the hashtab. */
5120
5121 static hashval_t
5122 ref_base_address_1 (const void *s)
5123 {
5124 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5125 unsigned i;
5126 VEC (data_ref_loc, heap) *refs;
5127 data_ref_loc *ref;
5128 hashval_t res = 0;
5129
5130 get_references_in_stmt (stmt, &refs);
5131
5132 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5133 if (!ref->is_read)
5134 {
5135 res = htab_hash_pointer (ref_base_address (stmt, ref));
5136 break;
5137 }
5138
5139 VEC_free (data_ref_loc, heap, refs);
5140 return res;
5141 }
5142
5143 /* Try to remove duplicated write data references from STMTS. */
5144
5145 void
5146 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5147 {
5148 unsigned i;
5149 gimple stmt;
5150 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5151 have_similar_memory_accesses_1, NULL);
5152
5153 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5154 {
5155 void **slot;
5156
5157 slot = htab_find_slot (seen, stmt, INSERT);
5158
5159 if (*slot)
5160 VEC_ordered_remove (gimple, *stmts, i);
5161 else
5162 {
5163 *slot = (void *) stmt;
5164 i++;
5165 }
5166 }
5167
5168 htab_delete (seen);
5169 }
5170
5171 /* Returns the index of PARAMETER in the parameters vector of the
5172 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5173
5174 int
5175 access_matrix_get_index_for_parameter (tree parameter,
5176 struct access_matrix *access_matrix)
5177 {
5178 int i;
5179 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5180 tree lambda_parameter;
5181
5182 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5183 if (lambda_parameter == parameter)
5184 return i + AM_NB_INDUCTION_VARS (access_matrix);
5185
5186 return -1;
5187 }