Remove the lambda framework and make -ftree-loop-linear an alias of -floop-interchange.
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87
88 static struct datadep_stats
89 {
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
94
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
98
99 int num_ziv;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
103
104 int num_siv;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
108
109 int num_miv;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
113 } dependence_stats;
114
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
118 struct loop *);
119 /* Returns true iff A divides B. */
120
121 static inline bool
122 tree_fold_divides_p (const_tree a, const_tree b)
123 {
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
127 }
128
129 /* Returns true iff A divides B. */
130
131 static inline bool
132 int_divides_p (int a, int b)
133 {
134 return ((b % a) == 0);
135 }
136
137 \f
138
139 /* Dump into FILE all the data references from DATAREFS. */
140
141 void
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
143 {
144 unsigned int i;
145 struct data_reference *dr;
146
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
149 }
150
151 /* Dump into STDERR all the data references from DATAREFS. */
152
153 DEBUG_FUNCTION void
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
155 {
156 dump_data_references (stderr, datarefs);
157 }
158
159 /* Dump to STDERR all the dependence relations from DDRS. */
160
161 DEBUG_FUNCTION void
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
163 {
164 dump_data_dependence_relations (stderr, ddrs);
165 }
166
167 /* Dump into FILE all the dependence relations from DDRS. */
168
169 void
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
172 {
173 unsigned int i;
174 struct data_dependence_relation *ddr;
175
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
178 }
179
180 /* Print to STDERR the data_reference DR. */
181
182 DEBUG_FUNCTION void
183 debug_data_reference (struct data_reference *dr)
184 {
185 dump_data_reference (stderr, dr);
186 }
187
188 /* Dump function for a DATA_REFERENCE structure. */
189
190 void
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
193 {
194 unsigned int i;
195
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
204
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
206 {
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
209 }
210 fprintf (outf, "#)\n");
211 }
212
213 /* Dumps the affine function described by FN to the file OUTF. */
214
215 static void
216 dump_affine_function (FILE *outf, affine_fn fn)
217 {
218 unsigned i;
219 tree coef;
220
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
223 {
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
227 }
228 }
229
230 /* Dumps the conflict function CF to the file OUTF. */
231
232 static void
233 dump_conflict_function (FILE *outf, conflict_function *cf)
234 {
235 unsigned i;
236
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
241 else
242 {
243 for (i = 0; i < cf->n; i++)
244 {
245 fprintf (outf, "[");
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
248 }
249 }
250 }
251
252 /* Dump function for a SUBSCRIPT structure. */
253
254 void
255 dump_subscript (FILE *outf, struct subscript *subscript)
256 {
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
258
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
263 {
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
267 }
268
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
273 {
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
277 }
278
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
283 }
284
285 /* Print the classic direction vector DIRV to OUTF. */
286
287 void
288 print_direction_vector (FILE *outf,
289 lambda_vector dirv,
290 int length)
291 {
292 int eq;
293
294 for (eq = 0; eq < length; eq++)
295 {
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
297 dirv[eq]);
298
299 switch (dir)
300 {
301 case dir_positive:
302 fprintf (outf, " +");
303 break;
304 case dir_negative:
305 fprintf (outf, " -");
306 break;
307 case dir_equal:
308 fprintf (outf, " =");
309 break;
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
312 break;
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
315 break;
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
318 break;
319 case dir_star:
320 fprintf (outf, " *");
321 break;
322 default:
323 fprintf (outf, "indep");
324 break;
325 }
326 }
327 fprintf (outf, "\n");
328 }
329
330 /* Print a vector of direction vectors. */
331
332 void
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
334 int length)
335 {
336 unsigned j;
337 lambda_vector v;
338
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
341 }
342
343 /* Print out a vector VEC of length N to OUTFILE. */
344
345 static inline void
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
347 {
348 int i;
349
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
353 }
354
355 /* Print a vector of distance vectors. */
356
357 void
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
359 int length)
360 {
361 unsigned j;
362 lambda_vector v;
363
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
366 }
367
368 /* Debug version. */
369
370 DEBUG_FUNCTION void
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
372 {
373 dump_data_dependence_relation (stderr, ddr);
374 }
375
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
377
378 void
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
381 {
382 struct data_reference *dra, *drb;
383
384 fprintf (outf, "(Data Dep: \n");
385
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
387 {
388 if (ddr)
389 {
390 dra = DDR_A (ddr);
391 drb = DDR_B (ddr);
392 if (dra)
393 dump_data_reference (outf, dra);
394 else
395 fprintf (outf, " (nil)\n");
396 if (drb)
397 dump_data_reference (outf, drb);
398 else
399 fprintf (outf, " (nil)\n");
400 }
401 fprintf (outf, " (don't know)\n)\n");
402 return;
403 }
404
405 dra = DDR_A (ddr);
406 drb = DDR_B (ddr);
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
409
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
412
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
414 {
415 unsigned int i;
416 struct loop *loopi;
417
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
419 {
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
425 }
426
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
432
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
434 {
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
437 DDR_NB_LOOPS (ddr));
438 }
439
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
441 {
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
444 DDR_NB_LOOPS (ddr));
445 }
446 }
447
448 fprintf (outf, ")\n");
449 }
450
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
452
453 void
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
456 {
457 switch (dir)
458 {
459 case dir_positive:
460 fprintf (file, "+");
461 break;
462
463 case dir_negative:
464 fprintf (file, "-");
465 break;
466
467 case dir_equal:
468 fprintf (file, "=");
469 break;
470
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
473 break;
474
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
477 break;
478
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
481 break;
482
483 case dir_star:
484 fprintf (file, "*");
485 break;
486
487 default:
488 break;
489 }
490 }
491
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
495 considered nest. */
496
497 void
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
499 {
500 unsigned int i, j;
501 struct data_dependence_relation *ddr;
502 lambda_vector v;
503
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
506 {
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
508 {
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
512 }
513
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
515 {
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
519 }
520 }
521
522 fprintf (file, "\n\n");
523 }
524
525 /* Dumps the data dependence relations DDRS in FILE. */
526
527 void
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
529 {
530 unsigned int i;
531 struct data_dependence_relation *ddr;
532
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
535
536 fprintf (file, "\n\n");
537 }
538
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
543 is returned. */
544
545 static bool
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
548 {
549 tree var0, var1;
550 tree off0, off1;
551 enum tree_code ocode = code;
552
553 *var = NULL_TREE;
554 *off = NULL_TREE;
555
556 switch (code)
557 {
558 case INTEGER_CST:
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
561 return true;
562
563 case POINTER_PLUS_EXPR:
564 ocode = PLUS_EXPR;
565 /* FALLTHROUGH */
566 case PLUS_EXPR:
567 case MINUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
572 return true;
573
574 case MULT_EXPR:
575 if (TREE_CODE (op1) != INTEGER_CST)
576 return false;
577
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
581 return true;
582
583 case ADDR_EXPR:
584 {
585 tree base, poffset;
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
589
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
592 return false;
593
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
596
597 if (pbitpos % BITS_PER_UNIT != 0)
598 return false;
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
601
602 if (poffset)
603 {
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
609 else
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
612 }
613
614 var0 = fold_convert (type, base);
615
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
622 {
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
627 } */
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
631 return false;
632
633 *var = var0;
634 *off = off0;
635 return true;
636 }
637
638 case SSA_NAME:
639 {
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
642
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
644 return false;
645
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
649
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
651 }
652 CASE_CONVERT:
653 {
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 {
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
666 return true;
667 }
668 return false;
669 }
670
671 default:
672 return false;
673 }
674 }
675
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
678
679 void
680 split_constant_offset (tree exp, tree *var, tree *off)
681 {
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
683 enum tree_code code;
684
685 *var = exp;
686 *off = ssize_int (0);
687 STRIP_NOPS (exp);
688
689 if (automatically_generated_chrec_p (exp))
690 return;
691
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 {
697 *var = fold_convert (type, e);
698 *off = o;
699 }
700 }
701
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
704
705 static tree
706 canonicalize_base_object_address (tree addr)
707 {
708 tree orig = addr;
709
710 STRIP_NOPS (addr);
711
712 /* The base address may be obtained by casting from integer, in that case
713 keep the cast. */
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
715 return orig;
716
717 if (TREE_CODE (addr) != ADDR_EXPR)
718 return addr;
719
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
721 }
722
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
725 otherwise. */
726
727 bool
728 dr_analyze_innermost (struct data_reference *dr)
729 {
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
734 tree base, poffset;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
740
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
743
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
747
748 if (pbitpos % BITS_PER_UNIT != 0)
749 {
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
752 return false;
753 }
754
755 if (TREE_CODE (base) == MEM_REF)
756 {
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
758 {
759 if (!poffset)
760 {
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
763 }
764 else
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 }
767 base = TREE_OPERAND (base, 0);
768 }
769 else
770 base = build_fold_addr_expr (base);
771 if (in_loop)
772 {
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
774 false))
775 {
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
778 return false;
779 }
780 }
781 else
782 {
783 base_iv.base = base;
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
786 }
787
788 if (!poffset)
789 {
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
792 }
793 else
794 {
795 if (!in_loop)
796 {
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
799 }
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
802 {
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
805 " affine.\n");
806 return false;
807 }
808 }
809
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
815
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
819
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
823 DR_INIT (dr) = init;
824 DR_STEP (dr) = step;
825
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
830
831 return true;
832 }
833
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
836
837 static void
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 {
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
844
845 if (nest)
846 before_loop = block_before_loop (nest);
847
848 while (handled_component_p (aref))
849 {
850 if (TREE_CODE (aref) == ARRAY_REF)
851 {
852 op = TREE_OPERAND (aref, 1);
853 if (nest)
854 {
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
858 }
859
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
861 }
862
863 aref = TREE_OPERAND (aref, 0);
864 }
865
866 if (nest
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
869 {
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
880
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
883 }
884
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
900
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
903 }
904
905 /* Extracts the alias analysis information from the memory reference DR. */
906
907 static void
908 dr_analyze_alias (struct data_reference *dr)
909 {
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
912
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
915 {
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
919 }
920 }
921
922 /* Returns true if the address of DR is invariant. */
923
924 static bool
925 dr_address_invariant_p (struct data_reference *dr)
926 {
927 unsigned i;
928 tree idx;
929
930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
931 if (tree_contains_chrecs (idx, NULL))
932 return false;
933
934 return true;
935 }
936
937 /* Frees data reference DR. */
938
939 void
940 free_data_ref (data_reference_p dr)
941 {
942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
943 free (dr);
944 }
945
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
951
952 struct data_reference *
953 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
954 bool is_read)
955 {
956 struct data_reference *dr;
957
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 {
960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
963 }
964
965 dr = XCNEW (struct data_reference);
966 DR_STMT (dr) = stmt;
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
969
970 dr_analyze_innermost (dr);
971 dr_analyze_indices (dr, nest, loop);
972 dr_analyze_alias (dr);
973
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 {
976 fprintf (dump_file, "\tbase_address: ");
977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
988 fprintf (dump_file, "\n");
989 }
990
991 return dr;
992 }
993
994 /* Returns true if FNA == FNB. */
995
996 static bool
997 affine_function_equal_p (affine_fn fna, affine_fn fnb)
998 {
999 unsigned i, n = VEC_length (tree, fna);
1000
1001 if (n != VEC_length (tree, fnb))
1002 return false;
1003
1004 for (i = 0; i < n; i++)
1005 if (!operand_equal_p (VEC_index (tree, fna, i),
1006 VEC_index (tree, fnb, i), 0))
1007 return false;
1008
1009 return true;
1010 }
1011
1012 /* If all the functions in CF are the same, returns one of them,
1013 otherwise returns NULL. */
1014
1015 static affine_fn
1016 common_affine_function (conflict_function *cf)
1017 {
1018 unsigned i;
1019 affine_fn comm;
1020
1021 if (!CF_NONTRIVIAL_P (cf))
1022 return NULL;
1023
1024 comm = cf->fns[0];
1025
1026 for (i = 1; i < cf->n; i++)
1027 if (!affine_function_equal_p (comm, cf->fns[i]))
1028 return NULL;
1029
1030 return comm;
1031 }
1032
1033 /* Returns the base of the affine function FN. */
1034
1035 static tree
1036 affine_function_base (affine_fn fn)
1037 {
1038 return VEC_index (tree, fn, 0);
1039 }
1040
1041 /* Returns true if FN is a constant. */
1042
1043 static bool
1044 affine_function_constant_p (affine_fn fn)
1045 {
1046 unsigned i;
1047 tree coef;
1048
1049 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1050 if (!integer_zerop (coef))
1051 return false;
1052
1053 return true;
1054 }
1055
1056 /* Returns true if FN is the zero constant function. */
1057
1058 static bool
1059 affine_function_zero_p (affine_fn fn)
1060 {
1061 return (integer_zerop (affine_function_base (fn))
1062 && affine_function_constant_p (fn));
1063 }
1064
1065 /* Returns a signed integer type with the largest precision from TA
1066 and TB. */
1067
1068 static tree
1069 signed_type_for_types (tree ta, tree tb)
1070 {
1071 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1072 return signed_type_for (ta);
1073 else
1074 return signed_type_for (tb);
1075 }
1076
1077 /* Applies operation OP on affine functions FNA and FNB, and returns the
1078 result. */
1079
1080 static affine_fn
1081 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1082 {
1083 unsigned i, n, m;
1084 affine_fn ret;
1085 tree coef;
1086
1087 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1088 {
1089 n = VEC_length (tree, fna);
1090 m = VEC_length (tree, fnb);
1091 }
1092 else
1093 {
1094 n = VEC_length (tree, fnb);
1095 m = VEC_length (tree, fna);
1096 }
1097
1098 ret = VEC_alloc (tree, heap, m);
1099 for (i = 0; i < n; i++)
1100 {
1101 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1102 TREE_TYPE (VEC_index (tree, fnb, i)));
1103
1104 VEC_quick_push (tree, ret,
1105 fold_build2 (op, type,
1106 VEC_index (tree, fna, i),
1107 VEC_index (tree, fnb, i)));
1108 }
1109
1110 for (; VEC_iterate (tree, fna, i, coef); i++)
1111 VEC_quick_push (tree, ret,
1112 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1113 coef, integer_zero_node));
1114 for (; VEC_iterate (tree, fnb, i, coef); i++)
1115 VEC_quick_push (tree, ret,
1116 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1117 integer_zero_node, coef));
1118
1119 return ret;
1120 }
1121
1122 /* Returns the sum of affine functions FNA and FNB. */
1123
1124 static affine_fn
1125 affine_fn_plus (affine_fn fna, affine_fn fnb)
1126 {
1127 return affine_fn_op (PLUS_EXPR, fna, fnb);
1128 }
1129
1130 /* Returns the difference of affine functions FNA and FNB. */
1131
1132 static affine_fn
1133 affine_fn_minus (affine_fn fna, affine_fn fnb)
1134 {
1135 return affine_fn_op (MINUS_EXPR, fna, fnb);
1136 }
1137
1138 /* Frees affine function FN. */
1139
1140 static void
1141 affine_fn_free (affine_fn fn)
1142 {
1143 VEC_free (tree, heap, fn);
1144 }
1145
1146 /* Determine for each subscript in the data dependence relation DDR
1147 the distance. */
1148
1149 static void
1150 compute_subscript_distance (struct data_dependence_relation *ddr)
1151 {
1152 conflict_function *cf_a, *cf_b;
1153 affine_fn fn_a, fn_b, diff;
1154
1155 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1156 {
1157 unsigned int i;
1158
1159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1160 {
1161 struct subscript *subscript;
1162
1163 subscript = DDR_SUBSCRIPT (ddr, i);
1164 cf_a = SUB_CONFLICTS_IN_A (subscript);
1165 cf_b = SUB_CONFLICTS_IN_B (subscript);
1166
1167 fn_a = common_affine_function (cf_a);
1168 fn_b = common_affine_function (cf_b);
1169 if (!fn_a || !fn_b)
1170 {
1171 SUB_DISTANCE (subscript) = chrec_dont_know;
1172 return;
1173 }
1174 diff = affine_fn_minus (fn_a, fn_b);
1175
1176 if (affine_function_constant_p (diff))
1177 SUB_DISTANCE (subscript) = affine_function_base (diff);
1178 else
1179 SUB_DISTANCE (subscript) = chrec_dont_know;
1180
1181 affine_fn_free (diff);
1182 }
1183 }
1184 }
1185
1186 /* Returns the conflict function for "unknown". */
1187
1188 static conflict_function *
1189 conflict_fn_not_known (void)
1190 {
1191 conflict_function *fn = XCNEW (conflict_function);
1192 fn->n = NOT_KNOWN;
1193
1194 return fn;
1195 }
1196
1197 /* Returns the conflict function for "independent". */
1198
1199 static conflict_function *
1200 conflict_fn_no_dependence (void)
1201 {
1202 conflict_function *fn = XCNEW (conflict_function);
1203 fn->n = NO_DEPENDENCE;
1204
1205 return fn;
1206 }
1207
1208 /* Returns true if the address of OBJ is invariant in LOOP. */
1209
1210 static bool
1211 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1212 {
1213 while (handled_component_p (obj))
1214 {
1215 if (TREE_CODE (obj) == ARRAY_REF)
1216 {
1217 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1218 need to check the stride and the lower bound of the reference. */
1219 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1220 loop->num)
1221 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1222 loop->num))
1223 return false;
1224 }
1225 else if (TREE_CODE (obj) == COMPONENT_REF)
1226 {
1227 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1228 loop->num))
1229 return false;
1230 }
1231 obj = TREE_OPERAND (obj, 0);
1232 }
1233
1234 if (!INDIRECT_REF_P (obj)
1235 && TREE_CODE (obj) != MEM_REF)
1236 return true;
1237
1238 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1239 loop->num);
1240 }
1241
1242 /* Returns false if we can prove that data references A and B do not alias,
1243 true otherwise. */
1244
1245 bool
1246 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1247 {
1248 tree addr_a = DR_BASE_OBJECT (a);
1249 tree addr_b = DR_BASE_OBJECT (b);
1250
1251 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1252 return refs_output_dependent_p (addr_a, addr_b);
1253 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1254 return refs_anti_dependent_p (addr_a, addr_b);
1255 return refs_may_alias_p (addr_a, addr_b);
1256 }
1257
1258 static void compute_self_dependence (struct data_dependence_relation *);
1259
1260 /* Initialize a data dependence relation between data accesses A and
1261 B. NB_LOOPS is the number of loops surrounding the references: the
1262 size of the classic distance/direction vectors. */
1263
1264 static struct data_dependence_relation *
1265 initialize_data_dependence_relation (struct data_reference *a,
1266 struct data_reference *b,
1267 VEC (loop_p, heap) *loop_nest)
1268 {
1269 struct data_dependence_relation *res;
1270 unsigned int i;
1271
1272 res = XNEW (struct data_dependence_relation);
1273 DDR_A (res) = a;
1274 DDR_B (res) = b;
1275 DDR_LOOP_NEST (res) = NULL;
1276 DDR_REVERSED_P (res) = false;
1277 DDR_SUBSCRIPTS (res) = NULL;
1278 DDR_DIR_VECTS (res) = NULL;
1279 DDR_DIST_VECTS (res) = NULL;
1280
1281 if (a == NULL || b == NULL)
1282 {
1283 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1284 return res;
1285 }
1286
1287 /* If the data references do not alias, then they are independent. */
1288 if (!dr_may_alias_p (a, b))
1289 {
1290 DDR_ARE_DEPENDENT (res) = chrec_known;
1291 return res;
1292 }
1293
1294 /* When the references are exactly the same, don't spend time doing
1295 the data dependence tests, just initialize the ddr and return. */
1296 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1297 {
1298 DDR_AFFINE_P (res) = true;
1299 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1300 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1301 DDR_LOOP_NEST (res) = loop_nest;
1302 DDR_INNER_LOOP (res) = 0;
1303 DDR_SELF_REFERENCE (res) = true;
1304 compute_self_dependence (res);
1305 return res;
1306 }
1307
1308 /* If the references do not access the same object, we do not know
1309 whether they alias or not. */
1310 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1311 {
1312 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1313 return res;
1314 }
1315
1316 /* If the base of the object is not invariant in the loop nest, we cannot
1317 analyze it. TODO -- in fact, it would suffice to record that there may
1318 be arbitrary dependences in the loops where the base object varies. */
1319 if (loop_nest
1320 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1321 DR_BASE_OBJECT (a)))
1322 {
1323 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1324 return res;
1325 }
1326
1327 /* If the number of dimensions of the access to not agree we can have
1328 a pointer access to a component of the array element type and an
1329 array access while the base-objects are still the same. Punt. */
1330 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1331 {
1332 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1333 return res;
1334 }
1335
1336 DDR_AFFINE_P (res) = true;
1337 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1338 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1339 DDR_LOOP_NEST (res) = loop_nest;
1340 DDR_INNER_LOOP (res) = 0;
1341 DDR_SELF_REFERENCE (res) = false;
1342
1343 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1344 {
1345 struct subscript *subscript;
1346
1347 subscript = XNEW (struct subscript);
1348 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1349 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1350 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1351 SUB_DISTANCE (subscript) = chrec_dont_know;
1352 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1353 }
1354
1355 return res;
1356 }
1357
1358 /* Frees memory used by the conflict function F. */
1359
1360 static void
1361 free_conflict_function (conflict_function *f)
1362 {
1363 unsigned i;
1364
1365 if (CF_NONTRIVIAL_P (f))
1366 {
1367 for (i = 0; i < f->n; i++)
1368 affine_fn_free (f->fns[i]);
1369 }
1370 free (f);
1371 }
1372
1373 /* Frees memory used by SUBSCRIPTS. */
1374
1375 static void
1376 free_subscripts (VEC (subscript_p, heap) *subscripts)
1377 {
1378 unsigned i;
1379 subscript_p s;
1380
1381 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1382 {
1383 free_conflict_function (s->conflicting_iterations_in_a);
1384 free_conflict_function (s->conflicting_iterations_in_b);
1385 free (s);
1386 }
1387 VEC_free (subscript_p, heap, subscripts);
1388 }
1389
1390 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1391 description. */
1392
1393 static inline void
1394 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1395 tree chrec)
1396 {
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1398 {
1399 fprintf (dump_file, "(dependence classified: ");
1400 print_generic_expr (dump_file, chrec, 0);
1401 fprintf (dump_file, ")\n");
1402 }
1403
1404 DDR_ARE_DEPENDENT (ddr) = chrec;
1405 free_subscripts (DDR_SUBSCRIPTS (ddr));
1406 DDR_SUBSCRIPTS (ddr) = NULL;
1407 }
1408
1409 /* The dependence relation DDR cannot be represented by a distance
1410 vector. */
1411
1412 static inline void
1413 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1414 {
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1417
1418 DDR_AFFINE_P (ddr) = false;
1419 }
1420
1421 \f
1422
1423 /* This section contains the classic Banerjee tests. */
1424
1425 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1426 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1427
1428 static inline bool
1429 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1430 {
1431 return (evolution_function_is_constant_p (chrec_a)
1432 && evolution_function_is_constant_p (chrec_b));
1433 }
1434
1435 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1436 variable, i.e., if the SIV (Single Index Variable) test is true. */
1437
1438 static bool
1439 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1440 {
1441 if ((evolution_function_is_constant_p (chrec_a)
1442 && evolution_function_is_univariate_p (chrec_b))
1443 || (evolution_function_is_constant_p (chrec_b)
1444 && evolution_function_is_univariate_p (chrec_a)))
1445 return true;
1446
1447 if (evolution_function_is_univariate_p (chrec_a)
1448 && evolution_function_is_univariate_p (chrec_b))
1449 {
1450 switch (TREE_CODE (chrec_a))
1451 {
1452 case POLYNOMIAL_CHREC:
1453 switch (TREE_CODE (chrec_b))
1454 {
1455 case POLYNOMIAL_CHREC:
1456 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1457 return false;
1458
1459 default:
1460 return true;
1461 }
1462
1463 default:
1464 return true;
1465 }
1466 }
1467
1468 return false;
1469 }
1470
1471 /* Creates a conflict function with N dimensions. The affine functions
1472 in each dimension follow. */
1473
1474 static conflict_function *
1475 conflict_fn (unsigned n, ...)
1476 {
1477 unsigned i;
1478 conflict_function *ret = XCNEW (conflict_function);
1479 va_list ap;
1480
1481 gcc_assert (0 < n && n <= MAX_DIM);
1482 va_start(ap, n);
1483
1484 ret->n = n;
1485 for (i = 0; i < n; i++)
1486 ret->fns[i] = va_arg (ap, affine_fn);
1487 va_end(ap);
1488
1489 return ret;
1490 }
1491
1492 /* Returns constant affine function with value CST. */
1493
1494 static affine_fn
1495 affine_fn_cst (tree cst)
1496 {
1497 affine_fn fn = VEC_alloc (tree, heap, 1);
1498 VEC_quick_push (tree, fn, cst);
1499 return fn;
1500 }
1501
1502 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1503
1504 static affine_fn
1505 affine_fn_univar (tree cst, unsigned dim, tree coef)
1506 {
1507 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1508 unsigned i;
1509
1510 gcc_assert (dim > 0);
1511 VEC_quick_push (tree, fn, cst);
1512 for (i = 1; i < dim; i++)
1513 VEC_quick_push (tree, fn, integer_zero_node);
1514 VEC_quick_push (tree, fn, coef);
1515 return fn;
1516 }
1517
1518 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1519 *OVERLAPS_B are initialized to the functions that describe the
1520 relation between the elements accessed twice by CHREC_A and
1521 CHREC_B. For k >= 0, the following property is verified:
1522
1523 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1524
1525 static void
1526 analyze_ziv_subscript (tree chrec_a,
1527 tree chrec_b,
1528 conflict_function **overlaps_a,
1529 conflict_function **overlaps_b,
1530 tree *last_conflicts)
1531 {
1532 tree type, difference;
1533 dependence_stats.num_ziv++;
1534
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "(analyze_ziv_subscript \n");
1537
1538 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1539 chrec_a = chrec_convert (type, chrec_a, NULL);
1540 chrec_b = chrec_convert (type, chrec_b, NULL);
1541 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1542
1543 switch (TREE_CODE (difference))
1544 {
1545 case INTEGER_CST:
1546 if (integer_zerop (difference))
1547 {
1548 /* The difference is equal to zero: the accessed index
1549 overlaps for each iteration in the loop. */
1550 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1551 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1552 *last_conflicts = chrec_dont_know;
1553 dependence_stats.num_ziv_dependent++;
1554 }
1555 else
1556 {
1557 /* The accesses do not overlap. */
1558 *overlaps_a = conflict_fn_no_dependence ();
1559 *overlaps_b = conflict_fn_no_dependence ();
1560 *last_conflicts = integer_zero_node;
1561 dependence_stats.num_ziv_independent++;
1562 }
1563 break;
1564
1565 default:
1566 /* We're not sure whether the indexes overlap. For the moment,
1567 conservatively answer "don't know". */
1568 if (dump_file && (dump_flags & TDF_DETAILS))
1569 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1570
1571 *overlaps_a = conflict_fn_not_known ();
1572 *overlaps_b = conflict_fn_not_known ();
1573 *last_conflicts = chrec_dont_know;
1574 dependence_stats.num_ziv_unimplemented++;
1575 break;
1576 }
1577
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, ")\n");
1580 }
1581
1582 /* Sets NIT to the estimated number of executions of the statements in
1583 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1584 large as the number of iterations. If we have no reliable estimate,
1585 the function returns false, otherwise returns true. */
1586
1587 bool
1588 estimated_loop_iterations (struct loop *loop, bool conservative,
1589 double_int *nit)
1590 {
1591 estimate_numbers_of_iterations_loop (loop, true);
1592 if (conservative)
1593 {
1594 if (!loop->any_upper_bound)
1595 return false;
1596
1597 *nit = loop->nb_iterations_upper_bound;
1598 }
1599 else
1600 {
1601 if (!loop->any_estimate)
1602 return false;
1603
1604 *nit = loop->nb_iterations_estimate;
1605 }
1606
1607 return true;
1608 }
1609
1610 /* Similar to estimated_loop_iterations, but returns the estimate only
1611 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1612 on the number of iterations of LOOP could not be derived, returns -1. */
1613
1614 HOST_WIDE_INT
1615 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1616 {
1617 double_int nit;
1618 HOST_WIDE_INT hwi_nit;
1619
1620 if (!estimated_loop_iterations (loop, conservative, &nit))
1621 return -1;
1622
1623 if (!double_int_fits_in_shwi_p (nit))
1624 return -1;
1625 hwi_nit = double_int_to_shwi (nit);
1626
1627 return hwi_nit < 0 ? -1 : hwi_nit;
1628 }
1629
1630 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1631 and only if it fits to the int type. If this is not the case, or the
1632 estimate on the number of iterations of LOOP could not be derived, returns
1633 chrec_dont_know. */
1634
1635 static tree
1636 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1637 {
1638 double_int nit;
1639 tree type;
1640
1641 if (!estimated_loop_iterations (loop, conservative, &nit))
1642 return chrec_dont_know;
1643
1644 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1645 if (!double_int_fits_to_tree_p (type, nit))
1646 return chrec_dont_know;
1647
1648 return double_int_to_tree (type, nit);
1649 }
1650
1651 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1652 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1653 *OVERLAPS_B are initialized to the functions that describe the
1654 relation between the elements accessed twice by CHREC_A and
1655 CHREC_B. For k >= 0, the following property is verified:
1656
1657 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1658
1659 static void
1660 analyze_siv_subscript_cst_affine (tree chrec_a,
1661 tree chrec_b,
1662 conflict_function **overlaps_a,
1663 conflict_function **overlaps_b,
1664 tree *last_conflicts)
1665 {
1666 bool value0, value1, value2;
1667 tree type, difference, tmp;
1668
1669 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1670 chrec_a = chrec_convert (type, chrec_a, NULL);
1671 chrec_b = chrec_convert (type, chrec_b, NULL);
1672 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1673
1674 if (!chrec_is_positive (initial_condition (difference), &value0))
1675 {
1676 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1678
1679 dependence_stats.num_siv_unimplemented++;
1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
1682 *last_conflicts = chrec_dont_know;
1683 return;
1684 }
1685 else
1686 {
1687 if (value0 == false)
1688 {
1689 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1690 {
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1693
1694 *overlaps_a = conflict_fn_not_known ();
1695 *overlaps_b = conflict_fn_not_known ();
1696 *last_conflicts = chrec_dont_know;
1697 dependence_stats.num_siv_unimplemented++;
1698 return;
1699 }
1700 else
1701 {
1702 if (value1 == true)
1703 {
1704 /* Example:
1705 chrec_a = 12
1706 chrec_b = {10, +, 1}
1707 */
1708
1709 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1710 {
1711 HOST_WIDE_INT numiter;
1712 struct loop *loop = get_chrec_loop (chrec_b);
1713
1714 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1715 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1716 fold_build1 (ABS_EXPR, type, difference),
1717 CHREC_RIGHT (chrec_b));
1718 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1719 *last_conflicts = integer_one_node;
1720
1721
1722 /* Perform weak-zero siv test to see if overlap is
1723 outside the loop bounds. */
1724 numiter = estimated_loop_iterations_int (loop, false);
1725
1726 if (numiter >= 0
1727 && compare_tree_int (tmp, numiter) > 0)
1728 {
1729 free_conflict_function (*overlaps_a);
1730 free_conflict_function (*overlaps_b);
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1735 return;
1736 }
1737 dependence_stats.num_siv_dependent++;
1738 return;
1739 }
1740
1741 /* When the step does not divide the difference, there are
1742 no overlaps. */
1743 else
1744 {
1745 *overlaps_a = conflict_fn_no_dependence ();
1746 *overlaps_b = conflict_fn_no_dependence ();
1747 *last_conflicts = integer_zero_node;
1748 dependence_stats.num_siv_independent++;
1749 return;
1750 }
1751 }
1752
1753 else
1754 {
1755 /* Example:
1756 chrec_a = 12
1757 chrec_b = {10, +, -1}
1758
1759 In this case, chrec_a will not overlap with chrec_b. */
1760 *overlaps_a = conflict_fn_no_dependence ();
1761 *overlaps_b = conflict_fn_no_dependence ();
1762 *last_conflicts = integer_zero_node;
1763 dependence_stats.num_siv_independent++;
1764 return;
1765 }
1766 }
1767 }
1768 else
1769 {
1770 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1771 {
1772 if (dump_file && (dump_flags & TDF_DETAILS))
1773 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1774
1775 *overlaps_a = conflict_fn_not_known ();
1776 *overlaps_b = conflict_fn_not_known ();
1777 *last_conflicts = chrec_dont_know;
1778 dependence_stats.num_siv_unimplemented++;
1779 return;
1780 }
1781 else
1782 {
1783 if (value2 == false)
1784 {
1785 /* Example:
1786 chrec_a = 3
1787 chrec_b = {10, +, -1}
1788 */
1789 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1790 {
1791 HOST_WIDE_INT numiter;
1792 struct loop *loop = get_chrec_loop (chrec_b);
1793
1794 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1795 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1796 CHREC_RIGHT (chrec_b));
1797 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1798 *last_conflicts = integer_one_node;
1799
1800 /* Perform weak-zero siv test to see if overlap is
1801 outside the loop bounds. */
1802 numiter = estimated_loop_iterations_int (loop, false);
1803
1804 if (numiter >= 0
1805 && compare_tree_int (tmp, numiter) > 0)
1806 {
1807 free_conflict_function (*overlaps_a);
1808 free_conflict_function (*overlaps_b);
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1813 return;
1814 }
1815 dependence_stats.num_siv_dependent++;
1816 return;
1817 }
1818
1819 /* When the step does not divide the difference, there
1820 are no overlaps. */
1821 else
1822 {
1823 *overlaps_a = conflict_fn_no_dependence ();
1824 *overlaps_b = conflict_fn_no_dependence ();
1825 *last_conflicts = integer_zero_node;
1826 dependence_stats.num_siv_independent++;
1827 return;
1828 }
1829 }
1830 else
1831 {
1832 /* Example:
1833 chrec_a = 3
1834 chrec_b = {4, +, 1}
1835
1836 In this case, chrec_a will not overlap with chrec_b. */
1837 *overlaps_a = conflict_fn_no_dependence ();
1838 *overlaps_b = conflict_fn_no_dependence ();
1839 *last_conflicts = integer_zero_node;
1840 dependence_stats.num_siv_independent++;
1841 return;
1842 }
1843 }
1844 }
1845 }
1846 }
1847
1848 /* Helper recursive function for initializing the matrix A. Returns
1849 the initial value of CHREC. */
1850
1851 static tree
1852 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1853 {
1854 gcc_assert (chrec);
1855
1856 switch (TREE_CODE (chrec))
1857 {
1858 case POLYNOMIAL_CHREC:
1859 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1860
1861 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1862 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1863
1864 case PLUS_EXPR:
1865 case MULT_EXPR:
1866 case MINUS_EXPR:
1867 {
1868 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1869 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1870
1871 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1872 }
1873
1874 case NOP_EXPR:
1875 {
1876 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1877 return chrec_convert (chrec_type (chrec), op, NULL);
1878 }
1879
1880 case BIT_NOT_EXPR:
1881 {
1882 /* Handle ~X as -1 - X. */
1883 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1884 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1885 build_int_cst (TREE_TYPE (chrec), -1), op);
1886 }
1887
1888 case INTEGER_CST:
1889 return chrec;
1890
1891 default:
1892 gcc_unreachable ();
1893 return NULL_TREE;
1894 }
1895 }
1896
1897 #define FLOOR_DIV(x,y) ((x) / (y))
1898
1899 /* Solves the special case of the Diophantine equation:
1900 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1901
1902 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1903 number of iterations that loops X and Y run. The overlaps will be
1904 constructed as evolutions in dimension DIM. */
1905
1906 static void
1907 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1908 affine_fn *overlaps_a,
1909 affine_fn *overlaps_b,
1910 tree *last_conflicts, int dim)
1911 {
1912 if (((step_a > 0 && step_b > 0)
1913 || (step_a < 0 && step_b < 0)))
1914 {
1915 int step_overlaps_a, step_overlaps_b;
1916 int gcd_steps_a_b, last_conflict, tau2;
1917
1918 gcd_steps_a_b = gcd (step_a, step_b);
1919 step_overlaps_a = step_b / gcd_steps_a_b;
1920 step_overlaps_b = step_a / gcd_steps_a_b;
1921
1922 if (niter > 0)
1923 {
1924 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1925 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1926 last_conflict = tau2;
1927 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1928 }
1929 else
1930 *last_conflicts = chrec_dont_know;
1931
1932 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1933 build_int_cst (NULL_TREE,
1934 step_overlaps_a));
1935 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1936 build_int_cst (NULL_TREE,
1937 step_overlaps_b));
1938 }
1939
1940 else
1941 {
1942 *overlaps_a = affine_fn_cst (integer_zero_node);
1943 *overlaps_b = affine_fn_cst (integer_zero_node);
1944 *last_conflicts = integer_zero_node;
1945 }
1946 }
1947
1948 /* Solves the special case of a Diophantine equation where CHREC_A is
1949 an affine bivariate function, and CHREC_B is an affine univariate
1950 function. For example,
1951
1952 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1953
1954 has the following overlapping functions:
1955
1956 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1957 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1958 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1959
1960 FORNOW: This is a specialized implementation for a case occurring in
1961 a common benchmark. Implement the general algorithm. */
1962
1963 static void
1964 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1965 conflict_function **overlaps_a,
1966 conflict_function **overlaps_b,
1967 tree *last_conflicts)
1968 {
1969 bool xz_p, yz_p, xyz_p;
1970 int step_x, step_y, step_z;
1971 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1972 affine_fn overlaps_a_xz, overlaps_b_xz;
1973 affine_fn overlaps_a_yz, overlaps_b_yz;
1974 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1975 affine_fn ova1, ova2, ovb;
1976 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1977
1978 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1979 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1980 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1981
1982 niter_x =
1983 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1984 false);
1985 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1986 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1987
1988 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1989 {
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1992
1993 *overlaps_a = conflict_fn_not_known ();
1994 *overlaps_b = conflict_fn_not_known ();
1995 *last_conflicts = chrec_dont_know;
1996 return;
1997 }
1998
1999 niter = MIN (niter_x, niter_z);
2000 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2001 &overlaps_a_xz,
2002 &overlaps_b_xz,
2003 &last_conflicts_xz, 1);
2004 niter = MIN (niter_y, niter_z);
2005 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2006 &overlaps_a_yz,
2007 &overlaps_b_yz,
2008 &last_conflicts_yz, 2);
2009 niter = MIN (niter_x, niter_z);
2010 niter = MIN (niter_y, niter);
2011 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2012 &overlaps_a_xyz,
2013 &overlaps_b_xyz,
2014 &last_conflicts_xyz, 3);
2015
2016 xz_p = !integer_zerop (last_conflicts_xz);
2017 yz_p = !integer_zerop (last_conflicts_yz);
2018 xyz_p = !integer_zerop (last_conflicts_xyz);
2019
2020 if (xz_p || yz_p || xyz_p)
2021 {
2022 ova1 = affine_fn_cst (integer_zero_node);
2023 ova2 = affine_fn_cst (integer_zero_node);
2024 ovb = affine_fn_cst (integer_zero_node);
2025 if (xz_p)
2026 {
2027 affine_fn t0 = ova1;
2028 affine_fn t2 = ovb;
2029
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2031 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2032 affine_fn_free (t0);
2033 affine_fn_free (t2);
2034 *last_conflicts = last_conflicts_xz;
2035 }
2036 if (yz_p)
2037 {
2038 affine_fn t0 = ova2;
2039 affine_fn t2 = ovb;
2040
2041 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2042 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2043 affine_fn_free (t0);
2044 affine_fn_free (t2);
2045 *last_conflicts = last_conflicts_yz;
2046 }
2047 if (xyz_p)
2048 {
2049 affine_fn t0 = ova1;
2050 affine_fn t2 = ova2;
2051 affine_fn t4 = ovb;
2052
2053 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2054 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2055 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2056 affine_fn_free (t0);
2057 affine_fn_free (t2);
2058 affine_fn_free (t4);
2059 *last_conflicts = last_conflicts_xyz;
2060 }
2061 *overlaps_a = conflict_fn (2, ova1, ova2);
2062 *overlaps_b = conflict_fn (1, ovb);
2063 }
2064 else
2065 {
2066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2067 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2068 *last_conflicts = integer_zero_node;
2069 }
2070
2071 affine_fn_free (overlaps_a_xz);
2072 affine_fn_free (overlaps_b_xz);
2073 affine_fn_free (overlaps_a_yz);
2074 affine_fn_free (overlaps_b_yz);
2075 affine_fn_free (overlaps_a_xyz);
2076 affine_fn_free (overlaps_b_xyz);
2077 }
2078
2079 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2080
2081 static void
2082 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2083 int size)
2084 {
2085 memcpy (vec2, vec1, size * sizeof (*vec1));
2086 }
2087
2088 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2089
2090 static void
2091 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2092 int m, int n)
2093 {
2094 int i;
2095
2096 for (i = 0; i < m; i++)
2097 lambda_vector_copy (mat1[i], mat2[i], n);
2098 }
2099
2100 /* Store the N x N identity matrix in MAT. */
2101
2102 static void
2103 lambda_matrix_id (lambda_matrix mat, int size)
2104 {
2105 int i, j;
2106
2107 for (i = 0; i < size; i++)
2108 for (j = 0; j < size; j++)
2109 mat[i][j] = (i == j) ? 1 : 0;
2110 }
2111
2112 /* Return the first nonzero element of vector VEC1 between START and N.
2113 We must have START <= N. Returns N if VEC1 is the zero vector. */
2114
2115 static int
2116 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2117 {
2118 int j = start;
2119 while (j < n && vec1[j] == 0)
2120 j++;
2121 return j;
2122 }
2123
2124 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2125 R2 = R2 + CONST1 * R1. */
2126
2127 static void
2128 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2129 {
2130 int i;
2131
2132 if (const1 == 0)
2133 return;
2134
2135 for (i = 0; i < n; i++)
2136 mat[r2][i] += const1 * mat[r1][i];
2137 }
2138
2139 /* Swap rows R1 and R2 in matrix MAT. */
2140
2141 static void
2142 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2143 {
2144 lambda_vector row;
2145
2146 row = mat[r1];
2147 mat[r1] = mat[r2];
2148 mat[r2] = row;
2149 }
2150
2151 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2152 and store the result in VEC2. */
2153
2154 static void
2155 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2156 int size, int const1)
2157 {
2158 int i;
2159
2160 if (const1 == 0)
2161 lambda_vector_clear (vec2, size);
2162 else
2163 for (i = 0; i < size; i++)
2164 vec2[i] = const1 * vec1[i];
2165 }
2166
2167 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2168
2169 static void
2170 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2171 int size)
2172 {
2173 lambda_vector_mult_const (vec1, vec2, size, -1);
2174 }
2175
2176 /* Negate row R1 of matrix MAT which has N columns. */
2177
2178 static void
2179 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2180 {
2181 lambda_vector_negate (mat[r1], mat[r1], n);
2182 }
2183
2184 /* Return true if two vectors are equal. */
2185
2186 static bool
2187 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2188 {
2189 int i;
2190 for (i = 0; i < size; i++)
2191 if (vec1[i] != vec2[i])
2192 return false;
2193 return true;
2194 }
2195
2196 /* Given an M x N integer matrix A, this function determines an M x
2197 M unimodular matrix U, and an M x N echelon matrix S such that
2198 "U.A = S". This decomposition is also known as "right Hermite".
2199
2200 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2201 Restructuring Compilers" Utpal Banerjee. */
2202
2203 static void
2204 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2205 lambda_matrix S, lambda_matrix U)
2206 {
2207 int i, j, i0 = 0;
2208
2209 lambda_matrix_copy (A, S, m, n);
2210 lambda_matrix_id (U, m);
2211
2212 for (j = 0; j < n; j++)
2213 {
2214 if (lambda_vector_first_nz (S[j], m, i0) < m)
2215 {
2216 ++i0;
2217 for (i = m - 1; i >= i0; i--)
2218 {
2219 while (S[i][j] != 0)
2220 {
2221 int sigma, factor, a, b;
2222
2223 a = S[i-1][j];
2224 b = S[i][j];
2225 sigma = (a * b < 0) ? -1: 1;
2226 a = abs (a);
2227 b = abs (b);
2228 factor = sigma * (a / b);
2229
2230 lambda_matrix_row_add (S, n, i, i-1, -factor);
2231 lambda_matrix_row_exchange (S, i, i-1);
2232
2233 lambda_matrix_row_add (U, m, i, i-1, -factor);
2234 lambda_matrix_row_exchange (U, i, i-1);
2235 }
2236 }
2237 }
2238 }
2239 }
2240
2241 /* Determines the overlapping elements due to accesses CHREC_A and
2242 CHREC_B, that are affine functions. This function cannot handle
2243 symbolic evolution functions, ie. when initial conditions are
2244 parameters, because it uses lambda matrices of integers. */
2245
2246 static void
2247 analyze_subscript_affine_affine (tree chrec_a,
2248 tree chrec_b,
2249 conflict_function **overlaps_a,
2250 conflict_function **overlaps_b,
2251 tree *last_conflicts)
2252 {
2253 unsigned nb_vars_a, nb_vars_b, dim;
2254 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2255 lambda_matrix A, U, S;
2256 struct obstack scratch_obstack;
2257
2258 if (eq_evolutions_p (chrec_a, chrec_b))
2259 {
2260 /* The accessed index overlaps for each iteration in the
2261 loop. */
2262 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2263 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2264 *last_conflicts = chrec_dont_know;
2265 return;
2266 }
2267 if (dump_file && (dump_flags & TDF_DETAILS))
2268 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2269
2270 /* For determining the initial intersection, we have to solve a
2271 Diophantine equation. This is the most time consuming part.
2272
2273 For answering to the question: "Is there a dependence?" we have
2274 to prove that there exists a solution to the Diophantine
2275 equation, and that the solution is in the iteration domain,
2276 i.e. the solution is positive or zero, and that the solution
2277 happens before the upper bound loop.nb_iterations. Otherwise
2278 there is no dependence. This function outputs a description of
2279 the iterations that hold the intersections. */
2280
2281 nb_vars_a = nb_vars_in_chrec (chrec_a);
2282 nb_vars_b = nb_vars_in_chrec (chrec_b);
2283
2284 gcc_obstack_init (&scratch_obstack);
2285
2286 dim = nb_vars_a + nb_vars_b;
2287 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2288 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2289 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2290
2291 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2292 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2293 gamma = init_b - init_a;
2294
2295 /* Don't do all the hard work of solving the Diophantine equation
2296 when we already know the solution: for example,
2297 | {3, +, 1}_1
2298 | {3, +, 4}_2
2299 | gamma = 3 - 3 = 0.
2300 Then the first overlap occurs during the first iterations:
2301 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2302 */
2303 if (gamma == 0)
2304 {
2305 if (nb_vars_a == 1 && nb_vars_b == 1)
2306 {
2307 HOST_WIDE_INT step_a, step_b;
2308 HOST_WIDE_INT niter, niter_a, niter_b;
2309 affine_fn ova, ovb;
2310
2311 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2312 false);
2313 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2314 false);
2315 niter = MIN (niter_a, niter_b);
2316 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2317 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2318
2319 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2320 &ova, &ovb,
2321 last_conflicts, 1);
2322 *overlaps_a = conflict_fn (1, ova);
2323 *overlaps_b = conflict_fn (1, ovb);
2324 }
2325
2326 else if (nb_vars_a == 2 && nb_vars_b == 1)
2327 compute_overlap_steps_for_affine_1_2
2328 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2329
2330 else if (nb_vars_a == 1 && nb_vars_b == 2)
2331 compute_overlap_steps_for_affine_1_2
2332 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2333
2334 else
2335 {
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2338 *overlaps_a = conflict_fn_not_known ();
2339 *overlaps_b = conflict_fn_not_known ();
2340 *last_conflicts = chrec_dont_know;
2341 }
2342 goto end_analyze_subs_aa;
2343 }
2344
2345 /* U.A = S */
2346 lambda_matrix_right_hermite (A, dim, 1, S, U);
2347
2348 if (S[0][0] < 0)
2349 {
2350 S[0][0] *= -1;
2351 lambda_matrix_row_negate (U, dim, 0);
2352 }
2353 gcd_alpha_beta = S[0][0];
2354
2355 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2356 but that is a quite strange case. Instead of ICEing, answer
2357 don't know. */
2358 if (gcd_alpha_beta == 0)
2359 {
2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
2362 *last_conflicts = chrec_dont_know;
2363 goto end_analyze_subs_aa;
2364 }
2365
2366 /* The classic "gcd-test". */
2367 if (!int_divides_p (gcd_alpha_beta, gamma))
2368 {
2369 /* The "gcd-test" has determined that there is no integer
2370 solution, i.e. there is no dependence. */
2371 *overlaps_a = conflict_fn_no_dependence ();
2372 *overlaps_b = conflict_fn_no_dependence ();
2373 *last_conflicts = integer_zero_node;
2374 }
2375
2376 /* Both access functions are univariate. This includes SIV and MIV cases. */
2377 else if (nb_vars_a == 1 && nb_vars_b == 1)
2378 {
2379 /* Both functions should have the same evolution sign. */
2380 if (((A[0][0] > 0 && -A[1][0] > 0)
2381 || (A[0][0] < 0 && -A[1][0] < 0)))
2382 {
2383 /* The solutions are given by:
2384 |
2385 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2386 | [u21 u22] [y0]
2387
2388 For a given integer t. Using the following variables,
2389
2390 | i0 = u11 * gamma / gcd_alpha_beta
2391 | j0 = u12 * gamma / gcd_alpha_beta
2392 | i1 = u21
2393 | j1 = u22
2394
2395 the solutions are:
2396
2397 | x0 = i0 + i1 * t,
2398 | y0 = j0 + j1 * t. */
2399 HOST_WIDE_INT i0, j0, i1, j1;
2400
2401 i0 = U[0][0] * gamma / gcd_alpha_beta;
2402 j0 = U[0][1] * gamma / gcd_alpha_beta;
2403 i1 = U[1][0];
2404 j1 = U[1][1];
2405
2406 if ((i1 == 0 && i0 < 0)
2407 || (j1 == 0 && j0 < 0))
2408 {
2409 /* There is no solution.
2410 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2411 falls in here, but for the moment we don't look at the
2412 upper bound of the iteration domain. */
2413 *overlaps_a = conflict_fn_no_dependence ();
2414 *overlaps_b = conflict_fn_no_dependence ();
2415 *last_conflicts = integer_zero_node;
2416 goto end_analyze_subs_aa;
2417 }
2418
2419 if (i1 > 0 && j1 > 0)
2420 {
2421 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2422 (get_chrec_loop (chrec_a), false);
2423 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2424 (get_chrec_loop (chrec_b), false);
2425 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2426
2427 /* (X0, Y0) is a solution of the Diophantine equation:
2428 "chrec_a (X0) = chrec_b (Y0)". */
2429 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2430 CEIL (-j0, j1));
2431 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2432 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2433
2434 /* (X1, Y1) is the smallest positive solution of the eq
2435 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2436 first conflict occurs. */
2437 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2438 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2439 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2440
2441 if (niter > 0)
2442 {
2443 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2444 FLOOR_DIV (niter - j0, j1));
2445 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2446
2447 /* If the overlap occurs outside of the bounds of the
2448 loop, there is no dependence. */
2449 if (x1 >= niter || y1 >= niter)
2450 {
2451 *overlaps_a = conflict_fn_no_dependence ();
2452 *overlaps_b = conflict_fn_no_dependence ();
2453 *last_conflicts = integer_zero_node;
2454 goto end_analyze_subs_aa;
2455 }
2456 else
2457 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2458 }
2459 else
2460 *last_conflicts = chrec_dont_know;
2461
2462 *overlaps_a
2463 = conflict_fn (1,
2464 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2465 1,
2466 build_int_cst (NULL_TREE, i1)));
2467 *overlaps_b
2468 = conflict_fn (1,
2469 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2470 1,
2471 build_int_cst (NULL_TREE, j1)));
2472 }
2473 else
2474 {
2475 /* FIXME: For the moment, the upper bound of the
2476 iteration domain for i and j is not checked. */
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2482 }
2483 }
2484 else
2485 {
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2491 }
2492 }
2493 else
2494 {
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2497 *overlaps_a = conflict_fn_not_known ();
2498 *overlaps_b = conflict_fn_not_known ();
2499 *last_conflicts = chrec_dont_know;
2500 }
2501
2502 end_analyze_subs_aa:
2503 obstack_free (&scratch_obstack, NULL);
2504 if (dump_file && (dump_flags & TDF_DETAILS))
2505 {
2506 fprintf (dump_file, " (overlaps_a = ");
2507 dump_conflict_function (dump_file, *overlaps_a);
2508 fprintf (dump_file, ")\n (overlaps_b = ");
2509 dump_conflict_function (dump_file, *overlaps_b);
2510 fprintf (dump_file, ")\n");
2511 fprintf (dump_file, ")\n");
2512 }
2513 }
2514
2515 /* Returns true when analyze_subscript_affine_affine can be used for
2516 determining the dependence relation between chrec_a and chrec_b,
2517 that contain symbols. This function modifies chrec_a and chrec_b
2518 such that the analysis result is the same, and such that they don't
2519 contain symbols, and then can safely be passed to the analyzer.
2520
2521 Example: The analysis of the following tuples of evolutions produce
2522 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2523 vs. {0, +, 1}_1
2524
2525 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2526 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2527 */
2528
2529 static bool
2530 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2531 {
2532 tree diff, type, left_a, left_b, right_b;
2533
2534 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2535 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2536 /* FIXME: For the moment not handled. Might be refined later. */
2537 return false;
2538
2539 type = chrec_type (*chrec_a);
2540 left_a = CHREC_LEFT (*chrec_a);
2541 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2542 diff = chrec_fold_minus (type, left_a, left_b);
2543
2544 if (!evolution_function_is_constant_p (diff))
2545 return false;
2546
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2548 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2549
2550 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2551 diff, CHREC_RIGHT (*chrec_a));
2552 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2553 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2554 build_int_cst (type, 0),
2555 right_b);
2556 return true;
2557 }
2558
2559 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2560 *OVERLAPS_B are initialized to the functions that describe the
2561 relation between the elements accessed twice by CHREC_A and
2562 CHREC_B. For k >= 0, the following property is verified:
2563
2564 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2565
2566 static void
2567 analyze_siv_subscript (tree chrec_a,
2568 tree chrec_b,
2569 conflict_function **overlaps_a,
2570 conflict_function **overlaps_b,
2571 tree *last_conflicts,
2572 int loop_nest_num)
2573 {
2574 dependence_stats.num_siv++;
2575
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file, "(analyze_siv_subscript \n");
2578
2579 if (evolution_function_is_constant_p (chrec_a)
2580 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2581 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2582 overlaps_a, overlaps_b, last_conflicts);
2583
2584 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2585 && evolution_function_is_constant_p (chrec_b))
2586 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2587 overlaps_b, overlaps_a, last_conflicts);
2588
2589 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2590 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2591 {
2592 if (!chrec_contains_symbols (chrec_a)
2593 && !chrec_contains_symbols (chrec_b))
2594 {
2595 analyze_subscript_affine_affine (chrec_a, chrec_b,
2596 overlaps_a, overlaps_b,
2597 last_conflicts);
2598
2599 if (CF_NOT_KNOWN_P (*overlaps_a)
2600 || CF_NOT_KNOWN_P (*overlaps_b))
2601 dependence_stats.num_siv_unimplemented++;
2602 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2603 || CF_NO_DEPENDENCE_P (*overlaps_b))
2604 dependence_stats.num_siv_independent++;
2605 else
2606 dependence_stats.num_siv_dependent++;
2607 }
2608 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2609 &chrec_b))
2610 {
2611 analyze_subscript_affine_affine (chrec_a, chrec_b,
2612 overlaps_a, overlaps_b,
2613 last_conflicts);
2614
2615 if (CF_NOT_KNOWN_P (*overlaps_a)
2616 || CF_NOT_KNOWN_P (*overlaps_b))
2617 dependence_stats.num_siv_unimplemented++;
2618 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2619 || CF_NO_DEPENDENCE_P (*overlaps_b))
2620 dependence_stats.num_siv_independent++;
2621 else
2622 dependence_stats.num_siv_dependent++;
2623 }
2624 else
2625 goto siv_subscript_dontknow;
2626 }
2627
2628 else
2629 {
2630 siv_subscript_dontknow:;
2631 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "siv test failed: unimplemented.\n");
2633 *overlaps_a = conflict_fn_not_known ();
2634 *overlaps_b = conflict_fn_not_known ();
2635 *last_conflicts = chrec_dont_know;
2636 dependence_stats.num_siv_unimplemented++;
2637 }
2638
2639 if (dump_file && (dump_flags & TDF_DETAILS))
2640 fprintf (dump_file, ")\n");
2641 }
2642
2643 /* Returns false if we can prove that the greatest common divisor of the steps
2644 of CHREC does not divide CST, false otherwise. */
2645
2646 static bool
2647 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2648 {
2649 HOST_WIDE_INT cd = 0, val;
2650 tree step;
2651
2652 if (!host_integerp (cst, 0))
2653 return true;
2654 val = tree_low_cst (cst, 0);
2655
2656 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2657 {
2658 step = CHREC_RIGHT (chrec);
2659 if (!host_integerp (step, 0))
2660 return true;
2661 cd = gcd (cd, tree_low_cst (step, 0));
2662 chrec = CHREC_LEFT (chrec);
2663 }
2664
2665 return val % cd == 0;
2666 }
2667
2668 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2669 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2670 functions that describe the relation between the elements accessed
2671 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2672 is verified:
2673
2674 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2675
2676 static void
2677 analyze_miv_subscript (tree chrec_a,
2678 tree chrec_b,
2679 conflict_function **overlaps_a,
2680 conflict_function **overlaps_b,
2681 tree *last_conflicts,
2682 struct loop *loop_nest)
2683 {
2684 /* FIXME: This is a MIV subscript, not yet handled.
2685 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2686 (A[i] vs. A[j]).
2687
2688 In the SIV test we had to solve a Diophantine equation with two
2689 variables. In the MIV case we have to solve a Diophantine
2690 equation with 2*n variables (if the subscript uses n IVs).
2691 */
2692 tree type, difference;
2693
2694 dependence_stats.num_miv++;
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2696 fprintf (dump_file, "(analyze_miv_subscript \n");
2697
2698 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2699 chrec_a = chrec_convert (type, chrec_a, NULL);
2700 chrec_b = chrec_convert (type, chrec_b, NULL);
2701 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2702
2703 if (eq_evolutions_p (chrec_a, chrec_b))
2704 {
2705 /* Access functions are the same: all the elements are accessed
2706 in the same order. */
2707 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2708 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2709 *last_conflicts = estimated_loop_iterations_tree
2710 (get_chrec_loop (chrec_a), true);
2711 dependence_stats.num_miv_dependent++;
2712 }
2713
2714 else if (evolution_function_is_constant_p (difference)
2715 /* For the moment, the following is verified:
2716 evolution_function_is_affine_multivariate_p (chrec_a,
2717 loop_nest->num) */
2718 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2719 {
2720 /* testsuite/.../ssa-chrec-33.c
2721 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2722
2723 The difference is 1, and all the evolution steps are multiples
2724 of 2, consequently there are no overlapping elements. */
2725 *overlaps_a = conflict_fn_no_dependence ();
2726 *overlaps_b = conflict_fn_no_dependence ();
2727 *last_conflicts = integer_zero_node;
2728 dependence_stats.num_miv_independent++;
2729 }
2730
2731 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2732 && !chrec_contains_symbols (chrec_a)
2733 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2734 && !chrec_contains_symbols (chrec_b))
2735 {
2736 /* testsuite/.../ssa-chrec-35.c
2737 {0, +, 1}_2 vs. {0, +, 1}_3
2738 the overlapping elements are respectively located at iterations:
2739 {0, +, 1}_x and {0, +, 1}_x,
2740 in other words, we have the equality:
2741 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2742
2743 Other examples:
2744 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2745 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2746
2747 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2748 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2749 */
2750 analyze_subscript_affine_affine (chrec_a, chrec_b,
2751 overlaps_a, overlaps_b, last_conflicts);
2752
2753 if (CF_NOT_KNOWN_P (*overlaps_a)
2754 || CF_NOT_KNOWN_P (*overlaps_b))
2755 dependence_stats.num_miv_unimplemented++;
2756 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2757 || CF_NO_DEPENDENCE_P (*overlaps_b))
2758 dependence_stats.num_miv_independent++;
2759 else
2760 dependence_stats.num_miv_dependent++;
2761 }
2762
2763 else
2764 {
2765 /* When the analysis is too difficult, answer "don't know". */
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2768
2769 *overlaps_a = conflict_fn_not_known ();
2770 *overlaps_b = conflict_fn_not_known ();
2771 *last_conflicts = chrec_dont_know;
2772 dependence_stats.num_miv_unimplemented++;
2773 }
2774
2775 if (dump_file && (dump_flags & TDF_DETAILS))
2776 fprintf (dump_file, ")\n");
2777 }
2778
2779 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2780 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2781 OVERLAP_ITERATIONS_B are initialized with two functions that
2782 describe the iterations that contain conflicting elements.
2783
2784 Remark: For an integer k >= 0, the following equality is true:
2785
2786 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2787 */
2788
2789 static void
2790 analyze_overlapping_iterations (tree chrec_a,
2791 tree chrec_b,
2792 conflict_function **overlap_iterations_a,
2793 conflict_function **overlap_iterations_b,
2794 tree *last_conflicts, struct loop *loop_nest)
2795 {
2796 unsigned int lnn = loop_nest->num;
2797
2798 dependence_stats.num_subscript_tests++;
2799
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2801 {
2802 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2803 fprintf (dump_file, " (chrec_a = ");
2804 print_generic_expr (dump_file, chrec_a, 0);
2805 fprintf (dump_file, ")\n (chrec_b = ");
2806 print_generic_expr (dump_file, chrec_b, 0);
2807 fprintf (dump_file, ")\n");
2808 }
2809
2810 if (chrec_a == NULL_TREE
2811 || chrec_b == NULL_TREE
2812 || chrec_contains_undetermined (chrec_a)
2813 || chrec_contains_undetermined (chrec_b))
2814 {
2815 dependence_stats.num_subscript_undetermined++;
2816
2817 *overlap_iterations_a = conflict_fn_not_known ();
2818 *overlap_iterations_b = conflict_fn_not_known ();
2819 }
2820
2821 /* If they are the same chrec, and are affine, they overlap
2822 on every iteration. */
2823 else if (eq_evolutions_p (chrec_a, chrec_b)
2824 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2825 || operand_equal_p (chrec_a, chrec_b, 0)))
2826 {
2827 dependence_stats.num_same_subscript_function++;
2828 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2829 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2830 *last_conflicts = chrec_dont_know;
2831 }
2832
2833 /* If they aren't the same, and aren't affine, we can't do anything
2834 yet. */
2835 else if ((chrec_contains_symbols (chrec_a)
2836 || chrec_contains_symbols (chrec_b))
2837 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2838 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2839 {
2840 dependence_stats.num_subscript_undetermined++;
2841 *overlap_iterations_a = conflict_fn_not_known ();
2842 *overlap_iterations_b = conflict_fn_not_known ();
2843 }
2844
2845 else if (ziv_subscript_p (chrec_a, chrec_b))
2846 analyze_ziv_subscript (chrec_a, chrec_b,
2847 overlap_iterations_a, overlap_iterations_b,
2848 last_conflicts);
2849
2850 else if (siv_subscript_p (chrec_a, chrec_b))
2851 analyze_siv_subscript (chrec_a, chrec_b,
2852 overlap_iterations_a, overlap_iterations_b,
2853 last_conflicts, lnn);
2854
2855 else
2856 analyze_miv_subscript (chrec_a, chrec_b,
2857 overlap_iterations_a, overlap_iterations_b,
2858 last_conflicts, loop_nest);
2859
2860 if (dump_file && (dump_flags & TDF_DETAILS))
2861 {
2862 fprintf (dump_file, " (overlap_iterations_a = ");
2863 dump_conflict_function (dump_file, *overlap_iterations_a);
2864 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2865 dump_conflict_function (dump_file, *overlap_iterations_b);
2866 fprintf (dump_file, ")\n");
2867 fprintf (dump_file, ")\n");
2868 }
2869 }
2870
2871 /* Helper function for uniquely inserting distance vectors. */
2872
2873 static void
2874 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2875 {
2876 unsigned i;
2877 lambda_vector v;
2878
2879 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2880 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2881 return;
2882
2883 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2884 }
2885
2886 /* Helper function for uniquely inserting direction vectors. */
2887
2888 static void
2889 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2890 {
2891 unsigned i;
2892 lambda_vector v;
2893
2894 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2895 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2896 return;
2897
2898 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2899 }
2900
2901 /* Add a distance of 1 on all the loops outer than INDEX. If we
2902 haven't yet determined a distance for this outer loop, push a new
2903 distance vector composed of the previous distance, and a distance
2904 of 1 for this outer loop. Example:
2905
2906 | loop_1
2907 | loop_2
2908 | A[10]
2909 | endloop_2
2910 | endloop_1
2911
2912 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2913 save (0, 1), then we have to save (1, 0). */
2914
2915 static void
2916 add_outer_distances (struct data_dependence_relation *ddr,
2917 lambda_vector dist_v, int index)
2918 {
2919 /* For each outer loop where init_v is not set, the accesses are
2920 in dependence of distance 1 in the loop. */
2921 while (--index >= 0)
2922 {
2923 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2924 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2925 save_v[index] = 1;
2926 save_dist_v (ddr, save_v);
2927 }
2928 }
2929
2930 /* Return false when fail to represent the data dependence as a
2931 distance vector. INIT_B is set to true when a component has been
2932 added to the distance vector DIST_V. INDEX_CARRY is then set to
2933 the index in DIST_V that carries the dependence. */
2934
2935 static bool
2936 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2937 struct data_reference *ddr_a,
2938 struct data_reference *ddr_b,
2939 lambda_vector dist_v, bool *init_b,
2940 int *index_carry)
2941 {
2942 unsigned i;
2943 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2944
2945 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2946 {
2947 tree access_fn_a, access_fn_b;
2948 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2949
2950 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2951 {
2952 non_affine_dependence_relation (ddr);
2953 return false;
2954 }
2955
2956 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2957 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2958
2959 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2960 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2961 {
2962 int dist, index;
2963 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2964 DDR_LOOP_NEST (ddr));
2965 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2966 DDR_LOOP_NEST (ddr));
2967
2968 /* The dependence is carried by the outermost loop. Example:
2969 | loop_1
2970 | A[{4, +, 1}_1]
2971 | loop_2
2972 | A[{5, +, 1}_2]
2973 | endloop_2
2974 | endloop_1
2975 In this case, the dependence is carried by loop_1. */
2976 index = index_a < index_b ? index_a : index_b;
2977 *index_carry = MIN (index, *index_carry);
2978
2979 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2980 {
2981 non_affine_dependence_relation (ddr);
2982 return false;
2983 }
2984
2985 dist = int_cst_value (SUB_DISTANCE (subscript));
2986
2987 /* This is the subscript coupling test. If we have already
2988 recorded a distance for this loop (a distance coming from
2989 another subscript), it should be the same. For example,
2990 in the following code, there is no dependence:
2991
2992 | loop i = 0, N, 1
2993 | T[i+1][i] = ...
2994 | ... = T[i][i]
2995 | endloop
2996 */
2997 if (init_v[index] != 0 && dist_v[index] != dist)
2998 {
2999 finalize_ddr_dependent (ddr, chrec_known);
3000 return false;
3001 }
3002
3003 dist_v[index] = dist;
3004 init_v[index] = 1;
3005 *init_b = true;
3006 }
3007 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3008 {
3009 /* This can be for example an affine vs. constant dependence
3010 (T[i] vs. T[3]) that is not an affine dependence and is
3011 not representable as a distance vector. */
3012 non_affine_dependence_relation (ddr);
3013 return false;
3014 }
3015 }
3016
3017 return true;
3018 }
3019
3020 /* Return true when the DDR contains only constant access functions. */
3021
3022 static bool
3023 constant_access_functions (const struct data_dependence_relation *ddr)
3024 {
3025 unsigned i;
3026
3027 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3028 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3029 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3030 return false;
3031
3032 return true;
3033 }
3034
3035 /* Helper function for the case where DDR_A and DDR_B are the same
3036 multivariate access function with a constant step. For an example
3037 see pr34635-1.c. */
3038
3039 static void
3040 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3041 {
3042 int x_1, x_2;
3043 tree c_1 = CHREC_LEFT (c_2);
3044 tree c_0 = CHREC_LEFT (c_1);
3045 lambda_vector dist_v;
3046 int v1, v2, cd;
3047
3048 /* Polynomials with more than 2 variables are not handled yet. When
3049 the evolution steps are parameters, it is not possible to
3050 represent the dependence using classical distance vectors. */
3051 if (TREE_CODE (c_0) != INTEGER_CST
3052 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3053 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3054 {
3055 DDR_AFFINE_P (ddr) = false;
3056 return;
3057 }
3058
3059 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3060 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3061
3062 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3063 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3064 v1 = int_cst_value (CHREC_RIGHT (c_1));
3065 v2 = int_cst_value (CHREC_RIGHT (c_2));
3066 cd = gcd (v1, v2);
3067 v1 /= cd;
3068 v2 /= cd;
3069
3070 if (v2 < 0)
3071 {
3072 v2 = -v2;
3073 v1 = -v1;
3074 }
3075
3076 dist_v[x_1] = v2;
3077 dist_v[x_2] = -v1;
3078 save_dist_v (ddr, dist_v);
3079
3080 add_outer_distances (ddr, dist_v, x_1);
3081 }
3082
3083 /* Helper function for the case where DDR_A and DDR_B are the same
3084 access functions. */
3085
3086 static void
3087 add_other_self_distances (struct data_dependence_relation *ddr)
3088 {
3089 lambda_vector dist_v;
3090 unsigned i;
3091 int index_carry = DDR_NB_LOOPS (ddr);
3092
3093 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3094 {
3095 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3096
3097 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3098 {
3099 if (!evolution_function_is_univariate_p (access_fun))
3100 {
3101 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3102 {
3103 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3104 return;
3105 }
3106
3107 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3108
3109 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3110 add_multivariate_self_dist (ddr, access_fun);
3111 else
3112 /* The evolution step is not constant: it varies in
3113 the outer loop, so this cannot be represented by a
3114 distance vector. For example in pr34635.c the
3115 evolution is {0, +, {0, +, 4}_1}_2. */
3116 DDR_AFFINE_P (ddr) = false;
3117
3118 return;
3119 }
3120
3121 index_carry = MIN (index_carry,
3122 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3123 DDR_LOOP_NEST (ddr)));
3124 }
3125 }
3126
3127 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3128 add_outer_distances (ddr, dist_v, index_carry);
3129 }
3130
3131 static void
3132 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3133 {
3134 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3135
3136 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3137 save_dist_v (ddr, dist_v);
3138 }
3139
3140 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3141 is the case for example when access functions are the same and
3142 equal to a constant, as in:
3143
3144 | loop_1
3145 | A[3] = ...
3146 | ... = A[3]
3147 | endloop_1
3148
3149 in which case the distance vectors are (0) and (1). */
3150
3151 static void
3152 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3153 {
3154 unsigned i, j;
3155
3156 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3157 {
3158 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3159 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3160 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3161
3162 for (j = 0; j < ca->n; j++)
3163 if (affine_function_zero_p (ca->fns[j]))
3164 {
3165 insert_innermost_unit_dist_vector (ddr);
3166 return;
3167 }
3168
3169 for (j = 0; j < cb->n; j++)
3170 if (affine_function_zero_p (cb->fns[j]))
3171 {
3172 insert_innermost_unit_dist_vector (ddr);
3173 return;
3174 }
3175 }
3176 }
3177
3178 /* Compute the classic per loop distance vector. DDR is the data
3179 dependence relation to build a vector from. Return false when fail
3180 to represent the data dependence as a distance vector. */
3181
3182 static bool
3183 build_classic_dist_vector (struct data_dependence_relation *ddr,
3184 struct loop *loop_nest)
3185 {
3186 bool init_b = false;
3187 int index_carry = DDR_NB_LOOPS (ddr);
3188 lambda_vector dist_v;
3189
3190 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3191 return false;
3192
3193 if (same_access_functions (ddr))
3194 {
3195 /* Save the 0 vector. */
3196 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3197 save_dist_v (ddr, dist_v);
3198
3199 if (constant_access_functions (ddr))
3200 add_distance_for_zero_overlaps (ddr);
3201
3202 if (DDR_NB_LOOPS (ddr) > 1)
3203 add_other_self_distances (ddr);
3204
3205 return true;
3206 }
3207
3208 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3209 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3210 dist_v, &init_b, &index_carry))
3211 return false;
3212
3213 /* Save the distance vector if we initialized one. */
3214 if (init_b)
3215 {
3216 /* Verify a basic constraint: classic distance vectors should
3217 always be lexicographically positive.
3218
3219 Data references are collected in the order of execution of
3220 the program, thus for the following loop
3221
3222 | for (i = 1; i < 100; i++)
3223 | for (j = 1; j < 100; j++)
3224 | {
3225 | t = T[j+1][i-1]; // A
3226 | T[j][i] = t + 2; // B
3227 | }
3228
3229 references are collected following the direction of the wind:
3230 A then B. The data dependence tests are performed also
3231 following this order, such that we're looking at the distance
3232 separating the elements accessed by A from the elements later
3233 accessed by B. But in this example, the distance returned by
3234 test_dep (A, B) is lexicographically negative (-1, 1), that
3235 means that the access A occurs later than B with respect to
3236 the outer loop, ie. we're actually looking upwind. In this
3237 case we solve test_dep (B, A) looking downwind to the
3238 lexicographically positive solution, that returns the
3239 distance vector (1, -1). */
3240 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3241 {
3242 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3243 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3244 loop_nest))
3245 return false;
3246 compute_subscript_distance (ddr);
3247 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3248 save_v, &init_b, &index_carry))
3249 return false;
3250 save_dist_v (ddr, save_v);
3251 DDR_REVERSED_P (ddr) = true;
3252
3253 /* In this case there is a dependence forward for all the
3254 outer loops:
3255
3256 | for (k = 1; k < 100; k++)
3257 | for (i = 1; i < 100; i++)
3258 | for (j = 1; j < 100; j++)
3259 | {
3260 | t = T[j+1][i-1]; // A
3261 | T[j][i] = t + 2; // B
3262 | }
3263
3264 the vectors are:
3265 (0, 1, -1)
3266 (1, 1, -1)
3267 (1, -1, 1)
3268 */
3269 if (DDR_NB_LOOPS (ddr) > 1)
3270 {
3271 add_outer_distances (ddr, save_v, index_carry);
3272 add_outer_distances (ddr, dist_v, index_carry);
3273 }
3274 }
3275 else
3276 {
3277 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3278 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3279
3280 if (DDR_NB_LOOPS (ddr) > 1)
3281 {
3282 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3283
3284 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3285 DDR_A (ddr), loop_nest))
3286 return false;
3287 compute_subscript_distance (ddr);
3288 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3289 opposite_v, &init_b,
3290 &index_carry))
3291 return false;
3292
3293 save_dist_v (ddr, save_v);
3294 add_outer_distances (ddr, dist_v, index_carry);
3295 add_outer_distances (ddr, opposite_v, index_carry);
3296 }
3297 else
3298 save_dist_v (ddr, save_v);
3299 }
3300 }
3301 else
3302 {
3303 /* There is a distance of 1 on all the outer loops: Example:
3304 there is a dependence of distance 1 on loop_1 for the array A.
3305
3306 | loop_1
3307 | A[5] = ...
3308 | endloop
3309 */
3310 add_outer_distances (ddr, dist_v,
3311 lambda_vector_first_nz (dist_v,
3312 DDR_NB_LOOPS (ddr), 0));
3313 }
3314
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3316 {
3317 unsigned i;
3318
3319 fprintf (dump_file, "(build_classic_dist_vector\n");
3320 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3321 {
3322 fprintf (dump_file, " dist_vector = (");
3323 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3324 DDR_NB_LOOPS (ddr));
3325 fprintf (dump_file, " )\n");
3326 }
3327 fprintf (dump_file, ")\n");
3328 }
3329
3330 return true;
3331 }
3332
3333 /* Return the direction for a given distance.
3334 FIXME: Computing dir this way is suboptimal, since dir can catch
3335 cases that dist is unable to represent. */
3336
3337 static inline enum data_dependence_direction
3338 dir_from_dist (int dist)
3339 {
3340 if (dist > 0)
3341 return dir_positive;
3342 else if (dist < 0)
3343 return dir_negative;
3344 else
3345 return dir_equal;
3346 }
3347
3348 /* Compute the classic per loop direction vector. DDR is the data
3349 dependence relation to build a vector from. */
3350
3351 static void
3352 build_classic_dir_vector (struct data_dependence_relation *ddr)
3353 {
3354 unsigned i, j;
3355 lambda_vector dist_v;
3356
3357 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3358 {
3359 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3360
3361 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3362 dir_v[j] = dir_from_dist (dist_v[j]);
3363
3364 save_dir_v (ddr, dir_v);
3365 }
3366 }
3367
3368 /* Helper function. Returns true when there is a dependence between
3369 data references DRA and DRB. */
3370
3371 static bool
3372 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3373 struct data_reference *dra,
3374 struct data_reference *drb,
3375 struct loop *loop_nest)
3376 {
3377 unsigned int i;
3378 tree last_conflicts;
3379 struct subscript *subscript;
3380
3381 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3382 i++)
3383 {
3384 conflict_function *overlaps_a, *overlaps_b;
3385
3386 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3387 DR_ACCESS_FN (drb, i),
3388 &overlaps_a, &overlaps_b,
3389 &last_conflicts, loop_nest);
3390
3391 if (CF_NOT_KNOWN_P (overlaps_a)
3392 || CF_NOT_KNOWN_P (overlaps_b))
3393 {
3394 finalize_ddr_dependent (ddr, chrec_dont_know);
3395 dependence_stats.num_dependence_undetermined++;
3396 free_conflict_function (overlaps_a);
3397 free_conflict_function (overlaps_b);
3398 return false;
3399 }
3400
3401 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3402 || CF_NO_DEPENDENCE_P (overlaps_b))
3403 {
3404 finalize_ddr_dependent (ddr, chrec_known);
3405 dependence_stats.num_dependence_independent++;
3406 free_conflict_function (overlaps_a);
3407 free_conflict_function (overlaps_b);
3408 return false;
3409 }
3410
3411 else
3412 {
3413 if (SUB_CONFLICTS_IN_A (subscript))
3414 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3415 if (SUB_CONFLICTS_IN_B (subscript))
3416 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3417
3418 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3419 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3420 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3421 }
3422 }
3423
3424 return true;
3425 }
3426
3427 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3428
3429 static void
3430 subscript_dependence_tester (struct data_dependence_relation *ddr,
3431 struct loop *loop_nest)
3432 {
3433
3434 if (dump_file && (dump_flags & TDF_DETAILS))
3435 fprintf (dump_file, "(subscript_dependence_tester \n");
3436
3437 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3438 dependence_stats.num_dependence_dependent++;
3439
3440 compute_subscript_distance (ddr);
3441 if (build_classic_dist_vector (ddr, loop_nest))
3442 build_classic_dir_vector (ddr);
3443
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3445 fprintf (dump_file, ")\n");
3446 }
3447
3448 /* Returns true when all the access functions of A are affine or
3449 constant with respect to LOOP_NEST. */
3450
3451 static bool
3452 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3453 const struct loop *loop_nest)
3454 {
3455 unsigned int i;
3456 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3457 tree t;
3458
3459 FOR_EACH_VEC_ELT (tree, fns, i, t)
3460 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3461 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3462 return false;
3463
3464 return true;
3465 }
3466
3467 /* Initializes an equation for an OMEGA problem using the information
3468 contained in the ACCESS_FUN. Returns true when the operation
3469 succeeded.
3470
3471 PB is the omega constraint system.
3472 EQ is the number of the equation to be initialized.
3473 OFFSET is used for shifting the variables names in the constraints:
3474 a constrain is composed of 2 * the number of variables surrounding
3475 dependence accesses. OFFSET is set either to 0 for the first n variables,
3476 then it is set to n.
3477 ACCESS_FUN is expected to be an affine chrec. */
3478
3479 static bool
3480 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3481 unsigned int offset, tree access_fun,
3482 struct data_dependence_relation *ddr)
3483 {
3484 switch (TREE_CODE (access_fun))
3485 {
3486 case POLYNOMIAL_CHREC:
3487 {
3488 tree left = CHREC_LEFT (access_fun);
3489 tree right = CHREC_RIGHT (access_fun);
3490 int var = CHREC_VARIABLE (access_fun);
3491 unsigned var_idx;
3492
3493 if (TREE_CODE (right) != INTEGER_CST)
3494 return false;
3495
3496 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3497 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3498
3499 /* Compute the innermost loop index. */
3500 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3501
3502 if (offset == 0)
3503 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3504 += int_cst_value (right);
3505
3506 switch (TREE_CODE (left))
3507 {
3508 case POLYNOMIAL_CHREC:
3509 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3510
3511 case INTEGER_CST:
3512 pb->eqs[eq].coef[0] += int_cst_value (left);
3513 return true;
3514
3515 default:
3516 return false;
3517 }
3518 }
3519
3520 case INTEGER_CST:
3521 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3522 return true;
3523
3524 default:
3525 return false;
3526 }
3527 }
3528
3529 /* As explained in the comments preceding init_omega_for_ddr, we have
3530 to set up a system for each loop level, setting outer loops
3531 variation to zero, and current loop variation to positive or zero.
3532 Save each lexico positive distance vector. */
3533
3534 static void
3535 omega_extract_distance_vectors (omega_pb pb,
3536 struct data_dependence_relation *ddr)
3537 {
3538 int eq, geq;
3539 unsigned i, j;
3540 struct loop *loopi, *loopj;
3541 enum omega_result res;
3542
3543 /* Set a new problem for each loop in the nest. The basis is the
3544 problem that we have initialized until now. On top of this we
3545 add new constraints. */
3546 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3547 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3548 {
3549 int dist = 0;
3550 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3551 DDR_NB_LOOPS (ddr));
3552
3553 omega_copy_problem (copy, pb);
3554
3555 /* For all the outer loops "loop_j", add "dj = 0". */
3556 for (j = 0;
3557 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3558 {
3559 eq = omega_add_zero_eq (copy, omega_black);
3560 copy->eqs[eq].coef[j + 1] = 1;
3561 }
3562
3563 /* For "loop_i", add "0 <= di". */
3564 geq = omega_add_zero_geq (copy, omega_black);
3565 copy->geqs[geq].coef[i + 1] = 1;
3566
3567 /* Reduce the constraint system, and test that the current
3568 problem is feasible. */
3569 res = omega_simplify_problem (copy);
3570 if (res == omega_false
3571 || res == omega_unknown
3572 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3573 goto next_problem;
3574
3575 for (eq = 0; eq < copy->num_subs; eq++)
3576 if (copy->subs[eq].key == (int) i + 1)
3577 {
3578 dist = copy->subs[eq].coef[0];
3579 goto found_dist;
3580 }
3581
3582 if (dist == 0)
3583 {
3584 /* Reinitialize problem... */
3585 omega_copy_problem (copy, pb);
3586 for (j = 0;
3587 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3588 {
3589 eq = omega_add_zero_eq (copy, omega_black);
3590 copy->eqs[eq].coef[j + 1] = 1;
3591 }
3592
3593 /* ..., but this time "di = 1". */
3594 eq = omega_add_zero_eq (copy, omega_black);
3595 copy->eqs[eq].coef[i + 1] = 1;
3596 copy->eqs[eq].coef[0] = -1;
3597
3598 res = omega_simplify_problem (copy);
3599 if (res == omega_false
3600 || res == omega_unknown
3601 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3602 goto next_problem;
3603
3604 for (eq = 0; eq < copy->num_subs; eq++)
3605 if (copy->subs[eq].key == (int) i + 1)
3606 {
3607 dist = copy->subs[eq].coef[0];
3608 goto found_dist;
3609 }
3610 }
3611
3612 found_dist:;
3613 /* Save the lexicographically positive distance vector. */
3614 if (dist >= 0)
3615 {
3616 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3617 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3618
3619 dist_v[i] = dist;
3620
3621 for (eq = 0; eq < copy->num_subs; eq++)
3622 if (copy->subs[eq].key > 0)
3623 {
3624 dist = copy->subs[eq].coef[0];
3625 dist_v[copy->subs[eq].key - 1] = dist;
3626 }
3627
3628 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3629 dir_v[j] = dir_from_dist (dist_v[j]);
3630
3631 save_dist_v (ddr, dist_v);
3632 save_dir_v (ddr, dir_v);
3633 }
3634
3635 next_problem:;
3636 omega_free_problem (copy);
3637 }
3638 }
3639
3640 /* This is called for each subscript of a tuple of data references:
3641 insert an equality for representing the conflicts. */
3642
3643 static bool
3644 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3645 struct data_dependence_relation *ddr,
3646 omega_pb pb, bool *maybe_dependent)
3647 {
3648 int eq;
3649 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3650 TREE_TYPE (access_fun_b));
3651 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3652 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3653 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3654 tree minus_one;
3655
3656 /* When the fun_a - fun_b is not constant, the dependence is not
3657 captured by the classic distance vector representation. */
3658 if (TREE_CODE (difference) != INTEGER_CST)
3659 return false;
3660
3661 /* ZIV test. */
3662 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3663 {
3664 /* There is no dependence. */
3665 *maybe_dependent = false;
3666 return true;
3667 }
3668
3669 minus_one = build_int_cst (type, -1);
3670 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3671
3672 eq = omega_add_zero_eq (pb, omega_black);
3673 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3674 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3675 /* There is probably a dependence, but the system of
3676 constraints cannot be built: answer "don't know". */
3677 return false;
3678
3679 /* GCD test. */
3680 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3681 && !int_divides_p (lambda_vector_gcd
3682 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3683 2 * DDR_NB_LOOPS (ddr)),
3684 pb->eqs[eq].coef[0]))
3685 {
3686 /* There is no dependence. */
3687 *maybe_dependent = false;
3688 return true;
3689 }
3690
3691 return true;
3692 }
3693
3694 /* Helper function, same as init_omega_for_ddr but specialized for
3695 data references A and B. */
3696
3697 static bool
3698 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3699 struct data_dependence_relation *ddr,
3700 omega_pb pb, bool *maybe_dependent)
3701 {
3702 unsigned i;
3703 int ineq;
3704 struct loop *loopi;
3705 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3706
3707 /* Insert an equality per subscript. */
3708 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3709 {
3710 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3711 ddr, pb, maybe_dependent))
3712 return false;
3713 else if (*maybe_dependent == false)
3714 {
3715 /* There is no dependence. */
3716 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3717 return true;
3718 }
3719 }
3720
3721 /* Insert inequalities: constraints corresponding to the iteration
3722 domain, i.e. the loops surrounding the references "loop_x" and
3723 the distance variables "dx". The layout of the OMEGA
3724 representation is as follows:
3725 - coef[0] is the constant
3726 - coef[1..nb_loops] are the protected variables that will not be
3727 removed by the solver: the "dx"
3728 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3729 */
3730 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3731 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3732 {
3733 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3734
3735 /* 0 <= loop_x */
3736 ineq = omega_add_zero_geq (pb, omega_black);
3737 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3738
3739 /* 0 <= loop_x + dx */
3740 ineq = omega_add_zero_geq (pb, omega_black);
3741 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3742 pb->geqs[ineq].coef[i + 1] = 1;
3743
3744 if (nbi != -1)
3745 {
3746 /* loop_x <= nb_iters */
3747 ineq = omega_add_zero_geq (pb, omega_black);
3748 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3749 pb->geqs[ineq].coef[0] = nbi;
3750
3751 /* loop_x + dx <= nb_iters */
3752 ineq = omega_add_zero_geq (pb, omega_black);
3753 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3754 pb->geqs[ineq].coef[i + 1] = -1;
3755 pb->geqs[ineq].coef[0] = nbi;
3756
3757 /* A step "dx" bigger than nb_iters is not feasible, so
3758 add "0 <= nb_iters + dx", */
3759 ineq = omega_add_zero_geq (pb, omega_black);
3760 pb->geqs[ineq].coef[i + 1] = 1;
3761 pb->geqs[ineq].coef[0] = nbi;
3762 /* and "dx <= nb_iters". */
3763 ineq = omega_add_zero_geq (pb, omega_black);
3764 pb->geqs[ineq].coef[i + 1] = -1;
3765 pb->geqs[ineq].coef[0] = nbi;
3766 }
3767 }
3768
3769 omega_extract_distance_vectors (pb, ddr);
3770
3771 return true;
3772 }
3773
3774 /* Sets up the Omega dependence problem for the data dependence
3775 relation DDR. Returns false when the constraint system cannot be
3776 built, ie. when the test answers "don't know". Returns true
3777 otherwise, and when independence has been proved (using one of the
3778 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3779 set MAYBE_DEPENDENT to true.
3780
3781 Example: for setting up the dependence system corresponding to the
3782 conflicting accesses
3783
3784 | loop_i
3785 | loop_j
3786 | A[i, i+1] = ...
3787 | ... A[2*j, 2*(i + j)]
3788 | endloop_j
3789 | endloop_i
3790
3791 the following constraints come from the iteration domain:
3792
3793 0 <= i <= Ni
3794 0 <= i + di <= Ni
3795 0 <= j <= Nj
3796 0 <= j + dj <= Nj
3797
3798 where di, dj are the distance variables. The constraints
3799 representing the conflicting elements are:
3800
3801 i = 2 * (j + dj)
3802 i + 1 = 2 * (i + di + j + dj)
3803
3804 For asking that the resulting distance vector (di, dj) be
3805 lexicographically positive, we insert the constraint "di >= 0". If
3806 "di = 0" in the solution, we fix that component to zero, and we
3807 look at the inner loops: we set a new problem where all the outer
3808 loop distances are zero, and fix this inner component to be
3809 positive. When one of the components is positive, we save that
3810 distance, and set a new problem where the distance on this loop is
3811 zero, searching for other distances in the inner loops. Here is
3812 the classic example that illustrates that we have to set for each
3813 inner loop a new problem:
3814
3815 | loop_1
3816 | loop_2
3817 | A[10]
3818 | endloop_2
3819 | endloop_1
3820
3821 we have to save two distances (1, 0) and (0, 1).
3822
3823 Given two array references, refA and refB, we have to set the
3824 dependence problem twice, refA vs. refB and refB vs. refA, and we
3825 cannot do a single test, as refB might occur before refA in the
3826 inner loops, and the contrary when considering outer loops: ex.
3827
3828 | loop_0
3829 | loop_1
3830 | loop_2
3831 | T[{1,+,1}_2][{1,+,1}_1] // refA
3832 | T[{2,+,1}_2][{0,+,1}_1] // refB
3833 | endloop_2
3834 | endloop_1
3835 | endloop_0
3836
3837 refB touches the elements in T before refA, and thus for the same
3838 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3839 but for successive loop_0 iterations, we have (1, -1, 1)
3840
3841 The Omega solver expects the distance variables ("di" in the
3842 previous example) to come first in the constraint system (as
3843 variables to be protected, or "safe" variables), the constraint
3844 system is built using the following layout:
3845
3846 "cst | distance vars | index vars".
3847 */
3848
3849 static bool
3850 init_omega_for_ddr (struct data_dependence_relation *ddr,
3851 bool *maybe_dependent)
3852 {
3853 omega_pb pb;
3854 bool res = false;
3855
3856 *maybe_dependent = true;
3857
3858 if (same_access_functions (ddr))
3859 {
3860 unsigned j;
3861 lambda_vector dir_v;
3862
3863 /* Save the 0 vector. */
3864 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3865 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3866 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3867 dir_v[j] = dir_equal;
3868 save_dir_v (ddr, dir_v);
3869
3870 /* Save the dependences carried by outer loops. */
3871 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3872 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3873 maybe_dependent);
3874 omega_free_problem (pb);
3875 return res;
3876 }
3877
3878 /* Omega expects the protected variables (those that have to be kept
3879 after elimination) to appear first in the constraint system.
3880 These variables are the distance variables. In the following
3881 initialization we declare NB_LOOPS safe variables, and the total
3882 number of variables for the constraint system is 2*NB_LOOPS. */
3883 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3884 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3885 maybe_dependent);
3886 omega_free_problem (pb);
3887
3888 /* Stop computation if not decidable, or no dependence. */
3889 if (res == false || *maybe_dependent == false)
3890 return res;
3891
3892 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3893 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3894 maybe_dependent);
3895 omega_free_problem (pb);
3896
3897 return res;
3898 }
3899
3900 /* Return true when DDR contains the same information as that stored
3901 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3902
3903 static bool
3904 ddr_consistent_p (FILE *file,
3905 struct data_dependence_relation *ddr,
3906 VEC (lambda_vector, heap) *dist_vects,
3907 VEC (lambda_vector, heap) *dir_vects)
3908 {
3909 unsigned int i, j;
3910
3911 /* If dump_file is set, output there. */
3912 if (dump_file && (dump_flags & TDF_DETAILS))
3913 file = dump_file;
3914
3915 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3916 {
3917 lambda_vector b_dist_v;
3918 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3919 VEC_length (lambda_vector, dist_vects),
3920 DDR_NUM_DIST_VECTS (ddr));
3921
3922 fprintf (file, "Banerjee dist vectors:\n");
3923 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3924 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3925
3926 fprintf (file, "Omega dist vectors:\n");
3927 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3928 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3929
3930 fprintf (file, "data dependence relation:\n");
3931 dump_data_dependence_relation (file, ddr);
3932
3933 fprintf (file, ")\n");
3934 return false;
3935 }
3936
3937 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3938 {
3939 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3940 VEC_length (lambda_vector, dir_vects),
3941 DDR_NUM_DIR_VECTS (ddr));
3942 return false;
3943 }
3944
3945 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3946 {
3947 lambda_vector a_dist_v;
3948 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3949
3950 /* Distance vectors are not ordered in the same way in the DDR
3951 and in the DIST_VECTS: search for a matching vector. */
3952 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3953 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3954 break;
3955
3956 if (j == VEC_length (lambda_vector, dist_vects))
3957 {
3958 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3959 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3960 fprintf (file, "not found in Omega dist vectors:\n");
3961 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3962 fprintf (file, "data dependence relation:\n");
3963 dump_data_dependence_relation (file, ddr);
3964 fprintf (file, ")\n");
3965 }
3966 }
3967
3968 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3969 {
3970 lambda_vector a_dir_v;
3971 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3972
3973 /* Direction vectors are not ordered in the same way in the DDR
3974 and in the DIR_VECTS: search for a matching vector. */
3975 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3976 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3977 break;
3978
3979 if (j == VEC_length (lambda_vector, dist_vects))
3980 {
3981 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3982 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3983 fprintf (file, "not found in Omega dir vectors:\n");
3984 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3985 fprintf (file, "data dependence relation:\n");
3986 dump_data_dependence_relation (file, ddr);
3987 fprintf (file, ")\n");
3988 }
3989 }
3990
3991 return true;
3992 }
3993
3994 /* This computes the affine dependence relation between A and B with
3995 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3996 independence between two accesses, while CHREC_DONT_KNOW is used
3997 for representing the unknown relation.
3998
3999 Note that it is possible to stop the computation of the dependence
4000 relation the first time we detect a CHREC_KNOWN element for a given
4001 subscript. */
4002
4003 static void
4004 compute_affine_dependence (struct data_dependence_relation *ddr,
4005 struct loop *loop_nest)
4006 {
4007 struct data_reference *dra = DDR_A (ddr);
4008 struct data_reference *drb = DDR_B (ddr);
4009
4010 if (dump_file && (dump_flags & TDF_DETAILS))
4011 {
4012 fprintf (dump_file, "(compute_affine_dependence\n");
4013 fprintf (dump_file, " (stmt_a = \n");
4014 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4015 fprintf (dump_file, ")\n (stmt_b = \n");
4016 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4017 fprintf (dump_file, ")\n");
4018 }
4019
4020 /* Analyze only when the dependence relation is not yet known. */
4021 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4022 && !DDR_SELF_REFERENCE (ddr))
4023 {
4024 dependence_stats.num_dependence_tests++;
4025
4026 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4027 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4028 {
4029 if (flag_check_data_deps)
4030 {
4031 /* Compute the dependences using the first algorithm. */
4032 subscript_dependence_tester (ddr, loop_nest);
4033
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4035 {
4036 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4037 dump_data_dependence_relation (dump_file, ddr);
4038 }
4039
4040 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4041 {
4042 bool maybe_dependent;
4043 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4044
4045 /* Save the result of the first DD analyzer. */
4046 dist_vects = DDR_DIST_VECTS (ddr);
4047 dir_vects = DDR_DIR_VECTS (ddr);
4048
4049 /* Reset the information. */
4050 DDR_DIST_VECTS (ddr) = NULL;
4051 DDR_DIR_VECTS (ddr) = NULL;
4052
4053 /* Compute the same information using Omega. */
4054 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4055 goto csys_dont_know;
4056
4057 if (dump_file && (dump_flags & TDF_DETAILS))
4058 {
4059 fprintf (dump_file, "Omega Analyzer\n");
4060 dump_data_dependence_relation (dump_file, ddr);
4061 }
4062
4063 /* Check that we get the same information. */
4064 if (maybe_dependent)
4065 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4066 dir_vects));
4067 }
4068 }
4069 else
4070 subscript_dependence_tester (ddr, loop_nest);
4071 }
4072
4073 /* As a last case, if the dependence cannot be determined, or if
4074 the dependence is considered too difficult to determine, answer
4075 "don't know". */
4076 else
4077 {
4078 csys_dont_know:;
4079 dependence_stats.num_dependence_undetermined++;
4080
4081 if (dump_file && (dump_flags & TDF_DETAILS))
4082 {
4083 fprintf (dump_file, "Data ref a:\n");
4084 dump_data_reference (dump_file, dra);
4085 fprintf (dump_file, "Data ref b:\n");
4086 dump_data_reference (dump_file, drb);
4087 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4088 }
4089 finalize_ddr_dependent (ddr, chrec_dont_know);
4090 }
4091 }
4092
4093 if (dump_file && (dump_flags & TDF_DETAILS))
4094 fprintf (dump_file, ")\n");
4095 }
4096
4097 /* This computes the dependence relation for the same data
4098 reference into DDR. */
4099
4100 static void
4101 compute_self_dependence (struct data_dependence_relation *ddr)
4102 {
4103 unsigned int i;
4104 struct subscript *subscript;
4105
4106 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4107 return;
4108
4109 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4110 i++)
4111 {
4112 if (SUB_CONFLICTS_IN_A (subscript))
4113 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4114 if (SUB_CONFLICTS_IN_B (subscript))
4115 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4116
4117 /* The accessed index overlaps for each iteration. */
4118 SUB_CONFLICTS_IN_A (subscript)
4119 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4120 SUB_CONFLICTS_IN_B (subscript)
4121 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4122 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4123 }
4124
4125 /* The distance vector is the zero vector. */
4126 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4127 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4128 }
4129
4130 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4131 the data references in DATAREFS, in the LOOP_NEST. When
4132 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4133 relations. */
4134
4135 void
4136 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4137 VEC (ddr_p, heap) **dependence_relations,
4138 VEC (loop_p, heap) *loop_nest,
4139 bool compute_self_and_rr)
4140 {
4141 struct data_dependence_relation *ddr;
4142 struct data_reference *a, *b;
4143 unsigned int i, j;
4144
4145 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4146 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4147 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4148 {
4149 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4150 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4151 if (loop_nest)
4152 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4153 }
4154
4155 if (compute_self_and_rr)
4156 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4157 {
4158 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4159 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4160 compute_self_dependence (ddr);
4161 }
4162 }
4163
4164 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4165 true if STMT clobbers memory, false otherwise. */
4166
4167 bool
4168 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4169 {
4170 bool clobbers_memory = false;
4171 data_ref_loc *ref;
4172 tree *op0, *op1;
4173 enum gimple_code stmt_code = gimple_code (stmt);
4174
4175 *references = NULL;
4176
4177 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4178 Calls have side-effects, except those to const or pure
4179 functions. */
4180 if ((stmt_code == GIMPLE_CALL
4181 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4182 || (stmt_code == GIMPLE_ASM
4183 && gimple_asm_volatile_p (stmt)))
4184 clobbers_memory = true;
4185
4186 if (!gimple_vuse (stmt))
4187 return clobbers_memory;
4188
4189 if (stmt_code == GIMPLE_ASSIGN)
4190 {
4191 tree base;
4192 op0 = gimple_assign_lhs_ptr (stmt);
4193 op1 = gimple_assign_rhs1_ptr (stmt);
4194
4195 if (DECL_P (*op1)
4196 || (REFERENCE_CLASS_P (*op1)
4197 && (base = get_base_address (*op1))
4198 && TREE_CODE (base) != SSA_NAME))
4199 {
4200 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4201 ref->pos = op1;
4202 ref->is_read = true;
4203 }
4204
4205 if (DECL_P (*op0)
4206 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4207 {
4208 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4209 ref->pos = op0;
4210 ref->is_read = false;
4211 }
4212 }
4213 else if (stmt_code == GIMPLE_CALL)
4214 {
4215 unsigned i, n = gimple_call_num_args (stmt);
4216
4217 for (i = 0; i < n; i++)
4218 {
4219 op0 = gimple_call_arg_ptr (stmt, i);
4220
4221 if (DECL_P (*op0)
4222 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4223 {
4224 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4225 ref->pos = op0;
4226 ref->is_read = true;
4227 }
4228 }
4229 }
4230
4231 return clobbers_memory;
4232 }
4233
4234 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4235 reference, returns false, otherwise returns true. NEST is the outermost
4236 loop of the loop nest in which the references should be analyzed. */
4237
4238 bool
4239 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4240 VEC (data_reference_p, heap) **datarefs)
4241 {
4242 unsigned i;
4243 VEC (data_ref_loc, heap) *references;
4244 data_ref_loc *ref;
4245 bool ret = true;
4246 data_reference_p dr;
4247
4248 if (get_references_in_stmt (stmt, &references))
4249 {
4250 VEC_free (data_ref_loc, heap, references);
4251 return false;
4252 }
4253
4254 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4255 {
4256 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4257 *ref->pos, stmt, ref->is_read);
4258 gcc_assert (dr != NULL);
4259
4260 /* FIXME -- data dependence analysis does not work correctly for objects
4261 with invariant addresses in loop nests. Let us fail here until the
4262 problem is fixed. */
4263 if (dr_address_invariant_p (dr) && nest)
4264 {
4265 free_data_ref (dr);
4266 if (dump_file && (dump_flags & TDF_DETAILS))
4267 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4268 ret = false;
4269 break;
4270 }
4271
4272 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4273 }
4274 VEC_free (data_ref_loc, heap, references);
4275 return ret;
4276 }
4277
4278 /* Stores the data references in STMT to DATAREFS. If there is an
4279 unanalyzable reference, returns false, otherwise returns true.
4280 NEST is the outermost loop of the loop nest in which the references
4281 should be instantiated, LOOP is the loop in which the references
4282 should be analyzed. */
4283
4284 bool
4285 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4286 VEC (data_reference_p, heap) **datarefs)
4287 {
4288 unsigned i;
4289 VEC (data_ref_loc, heap) *references;
4290 data_ref_loc *ref;
4291 bool ret = true;
4292 data_reference_p dr;
4293
4294 if (get_references_in_stmt (stmt, &references))
4295 {
4296 VEC_free (data_ref_loc, heap, references);
4297 return false;
4298 }
4299
4300 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4301 {
4302 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4303 gcc_assert (dr != NULL);
4304 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4305 }
4306
4307 VEC_free (data_ref_loc, heap, references);
4308 return ret;
4309 }
4310
4311 /* Search the data references in LOOP, and record the information into
4312 DATAREFS. Returns chrec_dont_know when failing to analyze a
4313 difficult case, returns NULL_TREE otherwise. */
4314
4315 static tree
4316 find_data_references_in_bb (struct loop *loop, basic_block bb,
4317 VEC (data_reference_p, heap) **datarefs)
4318 {
4319 gimple_stmt_iterator bsi;
4320
4321 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4322 {
4323 gimple stmt = gsi_stmt (bsi);
4324
4325 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4326 {
4327 struct data_reference *res;
4328 res = XCNEW (struct data_reference);
4329 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4330
4331 return chrec_dont_know;
4332 }
4333 }
4334
4335 return NULL_TREE;
4336 }
4337
4338 /* Search the data references in LOOP, and record the information into
4339 DATAREFS. Returns chrec_dont_know when failing to analyze a
4340 difficult case, returns NULL_TREE otherwise.
4341
4342 TODO: This function should be made smarter so that it can handle address
4343 arithmetic as if they were array accesses, etc. */
4344
4345 tree
4346 find_data_references_in_loop (struct loop *loop,
4347 VEC (data_reference_p, heap) **datarefs)
4348 {
4349 basic_block bb, *bbs;
4350 unsigned int i;
4351
4352 bbs = get_loop_body_in_dom_order (loop);
4353
4354 for (i = 0; i < loop->num_nodes; i++)
4355 {
4356 bb = bbs[i];
4357
4358 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4359 {
4360 free (bbs);
4361 return chrec_dont_know;
4362 }
4363 }
4364 free (bbs);
4365
4366 return NULL_TREE;
4367 }
4368
4369 /* Recursive helper function. */
4370
4371 static bool
4372 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4373 {
4374 /* Inner loops of the nest should not contain siblings. Example:
4375 when there are two consecutive loops,
4376
4377 | loop_0
4378 | loop_1
4379 | A[{0, +, 1}_1]
4380 | endloop_1
4381 | loop_2
4382 | A[{0, +, 1}_2]
4383 | endloop_2
4384 | endloop_0
4385
4386 the dependence relation cannot be captured by the distance
4387 abstraction. */
4388 if (loop->next)
4389 return false;
4390
4391 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4392 if (loop->inner)
4393 return find_loop_nest_1 (loop->inner, loop_nest);
4394 return true;
4395 }
4396
4397 /* Return false when the LOOP is not well nested. Otherwise return
4398 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4399 contain the loops from the outermost to the innermost, as they will
4400 appear in the classic distance vector. */
4401
4402 bool
4403 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4404 {
4405 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4406 if (loop->inner)
4407 return find_loop_nest_1 (loop->inner, loop_nest);
4408 return true;
4409 }
4410
4411 /* Returns true when the data dependences have been computed, false otherwise.
4412 Given a loop nest LOOP, the following vectors are returned:
4413 DATAREFS is initialized to all the array elements contained in this loop,
4414 DEPENDENCE_RELATIONS contains the relations between the data references.
4415 Compute read-read and self relations if
4416 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4417
4418 bool
4419 compute_data_dependences_for_loop (struct loop *loop,
4420 bool compute_self_and_read_read_dependences,
4421 VEC (loop_p, heap) **loop_nest,
4422 VEC (data_reference_p, heap) **datarefs,
4423 VEC (ddr_p, heap) **dependence_relations)
4424 {
4425 bool res = true;
4426
4427 memset (&dependence_stats, 0, sizeof (dependence_stats));
4428
4429 /* If the loop nest is not well formed, or one of the data references
4430 is not computable, give up without spending time to compute other
4431 dependences. */
4432 if (!loop
4433 || !find_loop_nest (loop, loop_nest)
4434 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4435 {
4436 struct data_dependence_relation *ddr;
4437
4438 /* Insert a single relation into dependence_relations:
4439 chrec_dont_know. */
4440 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4441 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4442 res = false;
4443 }
4444 else
4445 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4446 compute_self_and_read_read_dependences);
4447
4448 if (dump_file && (dump_flags & TDF_STATS))
4449 {
4450 fprintf (dump_file, "Dependence tester statistics:\n");
4451
4452 fprintf (dump_file, "Number of dependence tests: %d\n",
4453 dependence_stats.num_dependence_tests);
4454 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4455 dependence_stats.num_dependence_dependent);
4456 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4457 dependence_stats.num_dependence_independent);
4458 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4459 dependence_stats.num_dependence_undetermined);
4460
4461 fprintf (dump_file, "Number of subscript tests: %d\n",
4462 dependence_stats.num_subscript_tests);
4463 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4464 dependence_stats.num_subscript_undetermined);
4465 fprintf (dump_file, "Number of same subscript function: %d\n",
4466 dependence_stats.num_same_subscript_function);
4467
4468 fprintf (dump_file, "Number of ziv tests: %d\n",
4469 dependence_stats.num_ziv);
4470 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4471 dependence_stats.num_ziv_dependent);
4472 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4473 dependence_stats.num_ziv_independent);
4474 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4475 dependence_stats.num_ziv_unimplemented);
4476
4477 fprintf (dump_file, "Number of siv tests: %d\n",
4478 dependence_stats.num_siv);
4479 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4480 dependence_stats.num_siv_dependent);
4481 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4482 dependence_stats.num_siv_independent);
4483 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4484 dependence_stats.num_siv_unimplemented);
4485
4486 fprintf (dump_file, "Number of miv tests: %d\n",
4487 dependence_stats.num_miv);
4488 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4489 dependence_stats.num_miv_dependent);
4490 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4491 dependence_stats.num_miv_independent);
4492 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4493 dependence_stats.num_miv_unimplemented);
4494 }
4495
4496 return res;
4497 }
4498
4499 /* Returns true when the data dependences for the basic block BB have been
4500 computed, false otherwise.
4501 DATAREFS is initialized to all the array elements contained in this basic
4502 block, DEPENDENCE_RELATIONS contains the relations between the data
4503 references. Compute read-read and self relations if
4504 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4505 bool
4506 compute_data_dependences_for_bb (basic_block bb,
4507 bool compute_self_and_read_read_dependences,
4508 VEC (data_reference_p, heap) **datarefs,
4509 VEC (ddr_p, heap) **dependence_relations)
4510 {
4511 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4512 return false;
4513
4514 compute_all_dependences (*datarefs, dependence_relations, NULL,
4515 compute_self_and_read_read_dependences);
4516 return true;
4517 }
4518
4519 /* Entry point (for testing only). Analyze all the data references
4520 and the dependence relations in LOOP.
4521
4522 The data references are computed first.
4523
4524 A relation on these nodes is represented by a complete graph. Some
4525 of the relations could be of no interest, thus the relations can be
4526 computed on demand.
4527
4528 In the following function we compute all the relations. This is
4529 just a first implementation that is here for:
4530 - for showing how to ask for the dependence relations,
4531 - for the debugging the whole dependence graph,
4532 - for the dejagnu testcases and maintenance.
4533
4534 It is possible to ask only for a part of the graph, avoiding to
4535 compute the whole dependence graph. The computed dependences are
4536 stored in a knowledge base (KB) such that later queries don't
4537 recompute the same information. The implementation of this KB is
4538 transparent to the optimizer, and thus the KB can be changed with a
4539 more efficient implementation, or the KB could be disabled. */
4540 static void
4541 analyze_all_data_dependences (struct loop *loop)
4542 {
4543 unsigned int i;
4544 int nb_data_refs = 10;
4545 VEC (data_reference_p, heap) *datarefs =
4546 VEC_alloc (data_reference_p, heap, nb_data_refs);
4547 VEC (ddr_p, heap) *dependence_relations =
4548 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4549 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4550
4551 /* Compute DDs on the whole function. */
4552 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4553 &dependence_relations);
4554
4555 if (dump_file)
4556 {
4557 dump_data_dependence_relations (dump_file, dependence_relations);
4558 fprintf (dump_file, "\n\n");
4559
4560 if (dump_flags & TDF_DETAILS)
4561 dump_dist_dir_vectors (dump_file, dependence_relations);
4562
4563 if (dump_flags & TDF_STATS)
4564 {
4565 unsigned nb_top_relations = 0;
4566 unsigned nb_bot_relations = 0;
4567 unsigned nb_chrec_relations = 0;
4568 struct data_dependence_relation *ddr;
4569
4570 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4571 {
4572 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4573 nb_top_relations++;
4574
4575 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4576 nb_bot_relations++;
4577
4578 else
4579 nb_chrec_relations++;
4580 }
4581
4582 gather_stats_on_scev_database ();
4583 }
4584 }
4585
4586 VEC_free (loop_p, heap, loop_nest);
4587 free_dependence_relations (dependence_relations);
4588 free_data_refs (datarefs);
4589 }
4590
4591 /* Computes all the data dependences and check that the results of
4592 several analyzers are the same. */
4593
4594 void
4595 tree_check_data_deps (void)
4596 {
4597 loop_iterator li;
4598 struct loop *loop_nest;
4599
4600 FOR_EACH_LOOP (li, loop_nest, 0)
4601 analyze_all_data_dependences (loop_nest);
4602 }
4603
4604 /* Free the memory used by a data dependence relation DDR. */
4605
4606 void
4607 free_dependence_relation (struct data_dependence_relation *ddr)
4608 {
4609 if (ddr == NULL)
4610 return;
4611
4612 if (DDR_SUBSCRIPTS (ddr))
4613 free_subscripts (DDR_SUBSCRIPTS (ddr));
4614 if (DDR_DIST_VECTS (ddr))
4615 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4616 if (DDR_DIR_VECTS (ddr))
4617 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4618
4619 free (ddr);
4620 }
4621
4622 /* Free the memory used by the data dependence relations from
4623 DEPENDENCE_RELATIONS. */
4624
4625 void
4626 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4627 {
4628 unsigned int i;
4629 struct data_dependence_relation *ddr;
4630
4631 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4632 if (ddr)
4633 free_dependence_relation (ddr);
4634
4635 VEC_free (ddr_p, heap, dependence_relations);
4636 }
4637
4638 /* Free the memory used by the data references from DATAREFS. */
4639
4640 void
4641 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4642 {
4643 unsigned int i;
4644 struct data_reference *dr;
4645
4646 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4647 free_data_ref (dr);
4648 VEC_free (data_reference_p, heap, datarefs);
4649 }
4650
4651 \f
4652
4653 /* Dump vertex I in RDG to FILE. */
4654
4655 void
4656 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4657 {
4658 struct vertex *v = &(rdg->vertices[i]);
4659 struct graph_edge *e;
4660
4661 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4662 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4663 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4664
4665 if (v->pred)
4666 for (e = v->pred; e; e = e->pred_next)
4667 fprintf (file, " %d", e->src);
4668
4669 fprintf (file, ") (out:");
4670
4671 if (v->succ)
4672 for (e = v->succ; e; e = e->succ_next)
4673 fprintf (file, " %d", e->dest);
4674
4675 fprintf (file, ")\n");
4676 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4677 fprintf (file, ")\n");
4678 }
4679
4680 /* Call dump_rdg_vertex on stderr. */
4681
4682 DEBUG_FUNCTION void
4683 debug_rdg_vertex (struct graph *rdg, int i)
4684 {
4685 dump_rdg_vertex (stderr, rdg, i);
4686 }
4687
4688 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4689 dumped vertices to that bitmap. */
4690
4691 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4692 {
4693 int i;
4694
4695 fprintf (file, "(%d\n", c);
4696
4697 for (i = 0; i < rdg->n_vertices; i++)
4698 if (rdg->vertices[i].component == c)
4699 {
4700 if (dumped)
4701 bitmap_set_bit (dumped, i);
4702
4703 dump_rdg_vertex (file, rdg, i);
4704 }
4705
4706 fprintf (file, ")\n");
4707 }
4708
4709 /* Call dump_rdg_vertex on stderr. */
4710
4711 DEBUG_FUNCTION void
4712 debug_rdg_component (struct graph *rdg, int c)
4713 {
4714 dump_rdg_component (stderr, rdg, c, NULL);
4715 }
4716
4717 /* Dump the reduced dependence graph RDG to FILE. */
4718
4719 void
4720 dump_rdg (FILE *file, struct graph *rdg)
4721 {
4722 int i;
4723 bitmap dumped = BITMAP_ALLOC (NULL);
4724
4725 fprintf (file, "(rdg\n");
4726
4727 for (i = 0; i < rdg->n_vertices; i++)
4728 if (!bitmap_bit_p (dumped, i))
4729 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4730
4731 fprintf (file, ")\n");
4732 BITMAP_FREE (dumped);
4733 }
4734
4735 /* Call dump_rdg on stderr. */
4736
4737 DEBUG_FUNCTION void
4738 debug_rdg (struct graph *rdg)
4739 {
4740 dump_rdg (stderr, rdg);
4741 }
4742
4743 static void
4744 dot_rdg_1 (FILE *file, struct graph *rdg)
4745 {
4746 int i;
4747
4748 fprintf (file, "digraph RDG {\n");
4749
4750 for (i = 0; i < rdg->n_vertices; i++)
4751 {
4752 struct vertex *v = &(rdg->vertices[i]);
4753 struct graph_edge *e;
4754
4755 /* Highlight reads from memory. */
4756 if (RDG_MEM_READS_STMT (rdg, i))
4757 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4758
4759 /* Highlight stores to memory. */
4760 if (RDG_MEM_WRITE_STMT (rdg, i))
4761 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4762
4763 if (v->succ)
4764 for (e = v->succ; e; e = e->succ_next)
4765 switch (RDGE_TYPE (e))
4766 {
4767 case input_dd:
4768 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4769 break;
4770
4771 case output_dd:
4772 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4773 break;
4774
4775 case flow_dd:
4776 /* These are the most common dependences: don't print these. */
4777 fprintf (file, "%d -> %d \n", i, e->dest);
4778 break;
4779
4780 case anti_dd:
4781 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4782 break;
4783
4784 default:
4785 gcc_unreachable ();
4786 }
4787 }
4788
4789 fprintf (file, "}\n\n");
4790 }
4791
4792 /* Display the Reduced Dependence Graph using dotty. */
4793 extern void dot_rdg (struct graph *);
4794
4795 DEBUG_FUNCTION void
4796 dot_rdg (struct graph *rdg)
4797 {
4798 /* When debugging, enable the following code. This cannot be used
4799 in production compilers because it calls "system". */
4800 #if 0
4801 FILE *file = fopen ("/tmp/rdg.dot", "w");
4802 gcc_assert (file != NULL);
4803
4804 dot_rdg_1 (file, rdg);
4805 fclose (file);
4806
4807 system ("dotty /tmp/rdg.dot &");
4808 #else
4809 dot_rdg_1 (stderr, rdg);
4810 #endif
4811 }
4812
4813 /* This structure is used for recording the mapping statement index in
4814 the RDG. */
4815
4816 struct GTY(()) rdg_vertex_info
4817 {
4818 gimple stmt;
4819 int index;
4820 };
4821
4822 /* Returns the index of STMT in RDG. */
4823
4824 int
4825 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4826 {
4827 struct rdg_vertex_info rvi, *slot;
4828
4829 rvi.stmt = stmt;
4830 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4831
4832 if (!slot)
4833 return -1;
4834
4835 return slot->index;
4836 }
4837
4838 /* Creates an edge in RDG for each distance vector from DDR. The
4839 order that we keep track of in the RDG is the order in which
4840 statements have to be executed. */
4841
4842 static void
4843 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4844 {
4845 struct graph_edge *e;
4846 int va, vb;
4847 data_reference_p dra = DDR_A (ddr);
4848 data_reference_p drb = DDR_B (ddr);
4849 unsigned level = ddr_dependence_level (ddr);
4850
4851 /* For non scalar dependences, when the dependence is REVERSED,
4852 statement B has to be executed before statement A. */
4853 if (level > 0
4854 && !DDR_REVERSED_P (ddr))
4855 {
4856 data_reference_p tmp = dra;
4857 dra = drb;
4858 drb = tmp;
4859 }
4860
4861 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4862 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4863
4864 if (va < 0 || vb < 0)
4865 return;
4866
4867 e = add_edge (rdg, va, vb);
4868 e->data = XNEW (struct rdg_edge);
4869
4870 RDGE_LEVEL (e) = level;
4871 RDGE_RELATION (e) = ddr;
4872
4873 /* Determines the type of the data dependence. */
4874 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4875 RDGE_TYPE (e) = input_dd;
4876 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4877 RDGE_TYPE (e) = output_dd;
4878 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4879 RDGE_TYPE (e) = flow_dd;
4880 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4881 RDGE_TYPE (e) = anti_dd;
4882 }
4883
4884 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4885 the index of DEF in RDG. */
4886
4887 static void
4888 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4889 {
4890 use_operand_p imm_use_p;
4891 imm_use_iterator iterator;
4892
4893 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4894 {
4895 struct graph_edge *e;
4896 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4897
4898 if (use < 0)
4899 continue;
4900
4901 e = add_edge (rdg, idef, use);
4902 e->data = XNEW (struct rdg_edge);
4903 RDGE_TYPE (e) = flow_dd;
4904 RDGE_RELATION (e) = NULL;
4905 }
4906 }
4907
4908 /* Creates the edges of the reduced dependence graph RDG. */
4909
4910 static void
4911 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4912 {
4913 int i;
4914 struct data_dependence_relation *ddr;
4915 def_operand_p def_p;
4916 ssa_op_iter iter;
4917
4918 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4919 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4920 create_rdg_edge_for_ddr (rdg, ddr);
4921
4922 for (i = 0; i < rdg->n_vertices; i++)
4923 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4924 iter, SSA_OP_DEF)
4925 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4926 }
4927
4928 /* Build the vertices of the reduced dependence graph RDG. */
4929
4930 void
4931 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4932 {
4933 int i, j;
4934 gimple stmt;
4935
4936 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4937 {
4938 VEC (data_ref_loc, heap) *references;
4939 data_ref_loc *ref;
4940 struct vertex *v = &(rdg->vertices[i]);
4941 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4942 struct rdg_vertex_info **slot;
4943
4944 rvi->stmt = stmt;
4945 rvi->index = i;
4946 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4947
4948 if (!*slot)
4949 *slot = rvi;
4950 else
4951 free (rvi);
4952
4953 v->data = XNEW (struct rdg_vertex);
4954 RDG_STMT (rdg, i) = stmt;
4955
4956 RDG_MEM_WRITE_STMT (rdg, i) = false;
4957 RDG_MEM_READS_STMT (rdg, i) = false;
4958 if (gimple_code (stmt) == GIMPLE_PHI)
4959 continue;
4960
4961 get_references_in_stmt (stmt, &references);
4962 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4963 if (!ref->is_read)
4964 RDG_MEM_WRITE_STMT (rdg, i) = true;
4965 else
4966 RDG_MEM_READS_STMT (rdg, i) = true;
4967
4968 VEC_free (data_ref_loc, heap, references);
4969 }
4970 }
4971
4972 /* Initialize STMTS with all the statements of LOOP. When
4973 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4974 which we discover statements is important as
4975 generate_loops_for_partition is using the same traversal for
4976 identifying statements. */
4977
4978 static void
4979 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4980 {
4981 unsigned int i;
4982 basic_block *bbs = get_loop_body_in_dom_order (loop);
4983
4984 for (i = 0; i < loop->num_nodes; i++)
4985 {
4986 basic_block bb = bbs[i];
4987 gimple_stmt_iterator bsi;
4988 gimple stmt;
4989
4990 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4991 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4992
4993 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4994 {
4995 stmt = gsi_stmt (bsi);
4996 if (gimple_code (stmt) != GIMPLE_LABEL)
4997 VEC_safe_push (gimple, heap, *stmts, stmt);
4998 }
4999 }
5000
5001 free (bbs);
5002 }
5003
5004 /* Returns true when all the dependences are computable. */
5005
5006 static bool
5007 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5008 {
5009 ddr_p ddr;
5010 unsigned int i;
5011
5012 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5013 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5014 return false;
5015
5016 return true;
5017 }
5018
5019 /* Computes a hash function for element ELT. */
5020
5021 static hashval_t
5022 hash_stmt_vertex_info (const void *elt)
5023 {
5024 const struct rdg_vertex_info *const rvi =
5025 (const struct rdg_vertex_info *) elt;
5026 gimple stmt = rvi->stmt;
5027
5028 return htab_hash_pointer (stmt);
5029 }
5030
5031 /* Compares database elements E1 and E2. */
5032
5033 static int
5034 eq_stmt_vertex_info (const void *e1, const void *e2)
5035 {
5036 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5037 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5038
5039 return elt1->stmt == elt2->stmt;
5040 }
5041
5042 /* Free the element E. */
5043
5044 static void
5045 hash_stmt_vertex_del (void *e)
5046 {
5047 free (e);
5048 }
5049
5050 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5051 statement of the loop nest, and one edge per data dependence or
5052 scalar dependence. */
5053
5054 struct graph *
5055 build_empty_rdg (int n_stmts)
5056 {
5057 int nb_data_refs = 10;
5058 struct graph *rdg = new_graph (n_stmts);
5059
5060 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5061 eq_stmt_vertex_info, hash_stmt_vertex_del);
5062 return rdg;
5063 }
5064
5065 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5066 statement of the loop nest, and one edge per data dependence or
5067 scalar dependence. */
5068
5069 struct graph *
5070 build_rdg (struct loop *loop,
5071 VEC (loop_p, heap) **loop_nest,
5072 VEC (ddr_p, heap) **dependence_relations,
5073 VEC (data_reference_p, heap) **datarefs)
5074 {
5075 struct graph *rdg = NULL;
5076 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5077
5078 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5079 dependence_relations);
5080
5081 if (known_dependences_p (*dependence_relations))
5082 {
5083 stmts_from_loop (loop, &stmts);
5084 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5085 create_rdg_vertices (rdg, stmts);
5086 create_rdg_edges (rdg, *dependence_relations);
5087 }
5088
5089 VEC_free (gimple, heap, stmts);
5090 return rdg;
5091 }
5092
5093 /* Free the reduced dependence graph RDG. */
5094
5095 void
5096 free_rdg (struct graph *rdg)
5097 {
5098 int i;
5099
5100 for (i = 0; i < rdg->n_vertices; i++)
5101 {
5102 struct vertex *v = &(rdg->vertices[i]);
5103 struct graph_edge *e;
5104
5105 for (e = v->succ; e; e = e->succ_next)
5106 if (e->data)
5107 free (e->data);
5108
5109 if (v->data)
5110 free (v->data);
5111 }
5112
5113 htab_delete (rdg->indices);
5114 free_graph (rdg);
5115 }
5116
5117 /* Initialize STMTS with all the statements of LOOP that contain a
5118 store to memory. */
5119
5120 void
5121 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5122 {
5123 unsigned int i;
5124 basic_block *bbs = get_loop_body_in_dom_order (loop);
5125
5126 for (i = 0; i < loop->num_nodes; i++)
5127 {
5128 basic_block bb = bbs[i];
5129 gimple_stmt_iterator bsi;
5130
5131 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5132 if (gimple_vdef (gsi_stmt (bsi)))
5133 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5134 }
5135
5136 free (bbs);
5137 }
5138
5139 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5140 that contains a data reference on its LHS with a stride of the same
5141 size as its unit type. */
5142
5143 bool
5144 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5145 {
5146 tree op0, op1;
5147 bool res;
5148 struct data_reference *dr;
5149
5150 if (!stmt
5151 || !gimple_vdef (stmt)
5152 || !is_gimple_assign (stmt)
5153 || !gimple_assign_single_p (stmt)
5154 || !(op1 = gimple_assign_rhs1 (stmt))
5155 || !(integer_zerop (op1) || real_zerop (op1)))
5156 return false;
5157
5158 dr = XCNEW (struct data_reference);
5159 op0 = gimple_assign_lhs (stmt);
5160
5161 DR_STMT (dr) = stmt;
5162 DR_REF (dr) = op0;
5163
5164 res = dr_analyze_innermost (dr)
5165 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5166
5167 free_data_ref (dr);
5168 return res;
5169 }
5170
5171 /* Initialize STMTS with all the statements of LOOP that contain a
5172 store to memory of the form "A[i] = 0". */
5173
5174 void
5175 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5176 {
5177 unsigned int i;
5178 basic_block bb;
5179 gimple_stmt_iterator si;
5180 gimple stmt;
5181 basic_block *bbs = get_loop_body_in_dom_order (loop);
5182
5183 for (i = 0; i < loop->num_nodes; i++)
5184 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5185 if ((stmt = gsi_stmt (si))
5186 && stmt_with_adjacent_zero_store_dr_p (stmt))
5187 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5188
5189 free (bbs);
5190 }
5191
5192 /* For a data reference REF, return the declaration of its base
5193 address or NULL_TREE if the base is not determined. */
5194
5195 static inline tree
5196 ref_base_address (gimple stmt, data_ref_loc *ref)
5197 {
5198 tree base = NULL_TREE;
5199 tree base_address;
5200 struct data_reference *dr = XCNEW (struct data_reference);
5201
5202 DR_STMT (dr) = stmt;
5203 DR_REF (dr) = *ref->pos;
5204 dr_analyze_innermost (dr);
5205 base_address = DR_BASE_ADDRESS (dr);
5206
5207 if (!base_address)
5208 goto end;
5209
5210 switch (TREE_CODE (base_address))
5211 {
5212 case ADDR_EXPR:
5213 base = TREE_OPERAND (base_address, 0);
5214 break;
5215
5216 default:
5217 base = base_address;
5218 break;
5219 }
5220
5221 end:
5222 free_data_ref (dr);
5223 return base;
5224 }
5225
5226 /* Determines whether the statement from vertex V of the RDG has a
5227 definition used outside the loop that contains this statement. */
5228
5229 bool
5230 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5231 {
5232 gimple stmt = RDG_STMT (rdg, v);
5233 struct loop *loop = loop_containing_stmt (stmt);
5234 use_operand_p imm_use_p;
5235 imm_use_iterator iterator;
5236 ssa_op_iter it;
5237 def_operand_p def_p;
5238
5239 if (!loop)
5240 return true;
5241
5242 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5243 {
5244 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5245 {
5246 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5247 return true;
5248 }
5249 }
5250
5251 return false;
5252 }
5253
5254 /* Determines whether statements S1 and S2 access to similar memory
5255 locations. Two memory accesses are considered similar when they
5256 have the same base address declaration, i.e. when their
5257 ref_base_address is the same. */
5258
5259 bool
5260 have_similar_memory_accesses (gimple s1, gimple s2)
5261 {
5262 bool res = false;
5263 unsigned i, j;
5264 VEC (data_ref_loc, heap) *refs1, *refs2;
5265 data_ref_loc *ref1, *ref2;
5266
5267 get_references_in_stmt (s1, &refs1);
5268 get_references_in_stmt (s2, &refs2);
5269
5270 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5271 {
5272 tree base1 = ref_base_address (s1, ref1);
5273
5274 if (base1)
5275 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5276 if (base1 == ref_base_address (s2, ref2))
5277 {
5278 res = true;
5279 goto end;
5280 }
5281 }
5282
5283 end:
5284 VEC_free (data_ref_loc, heap, refs1);
5285 VEC_free (data_ref_loc, heap, refs2);
5286 return res;
5287 }
5288
5289 /* Helper function for the hashtab. */
5290
5291 static int
5292 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5293 {
5294 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5295 CONST_CAST_GIMPLE ((const_gimple) s2));
5296 }
5297
5298 /* Helper function for the hashtab. */
5299
5300 static hashval_t
5301 ref_base_address_1 (const void *s)
5302 {
5303 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5304 unsigned i;
5305 VEC (data_ref_loc, heap) *refs;
5306 data_ref_loc *ref;
5307 hashval_t res = 0;
5308
5309 get_references_in_stmt (stmt, &refs);
5310
5311 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5312 if (!ref->is_read)
5313 {
5314 res = htab_hash_pointer (ref_base_address (stmt, ref));
5315 break;
5316 }
5317
5318 VEC_free (data_ref_loc, heap, refs);
5319 return res;
5320 }
5321
5322 /* Try to remove duplicated write data references from STMTS. */
5323
5324 void
5325 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5326 {
5327 unsigned i;
5328 gimple stmt;
5329 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5330 have_similar_memory_accesses_1, NULL);
5331
5332 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5333 {
5334 void **slot;
5335
5336 slot = htab_find_slot (seen, stmt, INSERT);
5337
5338 if (*slot)
5339 VEC_ordered_remove (gimple, *stmts, i);
5340 else
5341 {
5342 *slot = (void *) stmt;
5343 i++;
5344 }
5345 }
5346
5347 htab_delete (seen);
5348 }
5349
5350 /* Returns the index of PARAMETER in the parameters vector of the
5351 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5352
5353 int
5354 access_matrix_get_index_for_parameter (tree parameter,
5355 struct access_matrix *access_matrix)
5356 {
5357 int i;
5358 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5359 tree lambda_parameter;
5360
5361 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5362 if (lambda_parameter == parameter)
5363 return i + AM_NB_INDUCTION_VARS (access_matrix);
5364
5365 return -1;
5366 }