re PR tree-optimization/50698 (pretending to create versioning for alias when not...
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88
89 static struct datadep_stats
90 {
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
95
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
99
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
104
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
109
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
115
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
121
122 static inline bool
123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129
130 /* Returns true iff A divides B. */
131
132 static inline bool
133 int_divides_p (int a, int b)
134 {
135 return ((b % a) == 0);
136 }
137
138 \f
139
140 /* Dump into FILE all the data references from DATAREFS. */
141
142 void
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 {
145 unsigned int i;
146 struct data_reference *dr;
147
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
150 }
151
152 /* Dump into STDERR all the data references from DATAREFS. */
153
154 DEBUG_FUNCTION void
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 {
157 dump_data_references (stderr, datarefs);
158 }
159
160 /* Dump to STDERR all the dependence relations from DDRS. */
161
162 DEBUG_FUNCTION void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 {
165 dump_data_dependence_relations (stderr, ddrs);
166 }
167
168 /* Dump into FILE all the dependence relations from DDRS. */
169
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
173 {
174 unsigned int i;
175 struct data_dependence_relation *ddr;
176
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
179 }
180
181 /* Print to STDERR the data_reference DR. */
182
183 DEBUG_FUNCTION void
184 debug_data_reference (struct data_reference *dr)
185 {
186 dump_data_reference (stderr, dr);
187 }
188
189 /* Dump function for a DATA_REFERENCE structure. */
190
191 void
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
194 {
195 unsigned int i;
196
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 {
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 }
211 fprintf (outf, "#)\n");
212 }
213
214 /* Dumps the affine function described by FN to the file OUTF. */
215
216 static void
217 dump_affine_function (FILE *outf, affine_fn fn)
218 {
219 unsigned i;
220 tree coef;
221
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 {
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
228 }
229 }
230
231 /* Dumps the conflict function CF to the file OUTF. */
232
233 static void
234 dump_conflict_function (FILE *outf, conflict_function *cf)
235 {
236 unsigned i;
237
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
242 else
243 {
244 for (i = 0; i < cf->n; i++)
245 {
246 fprintf (outf, "[");
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
249 }
250 }
251 }
252
253 /* Dump function for a SUBSCRIPT structure. */
254
255 void
256 dump_subscript (FILE *outf, struct subscript *subscript)
257 {
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
264 {
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
268 }
269
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
274 {
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
278 }
279
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
284 }
285
286 /* Print the classic direction vector DIRV to OUTF. */
287
288 void
289 print_direction_vector (FILE *outf,
290 lambda_vector dirv,
291 int length)
292 {
293 int eq;
294
295 for (eq = 0; eq < length; eq++)
296 {
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
298 dirv[eq]);
299
300 switch (dir)
301 {
302 case dir_positive:
303 fprintf (outf, " +");
304 break;
305 case dir_negative:
306 fprintf (outf, " -");
307 break;
308 case dir_equal:
309 fprintf (outf, " =");
310 break;
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
313 break;
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
316 break;
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
319 break;
320 case dir_star:
321 fprintf (outf, " *");
322 break;
323 default:
324 fprintf (outf, "indep");
325 break;
326 }
327 }
328 fprintf (outf, "\n");
329 }
330
331 /* Print a vector of direction vectors. */
332
333 void
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
335 int length)
336 {
337 unsigned j;
338 lambda_vector v;
339
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
342 }
343
344 /* Print out a vector VEC of length N to OUTFILE. */
345
346 static inline void
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 {
349 int i;
350
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
354 }
355
356 /* Print a vector of distance vectors. */
357
358 void
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
360 int length)
361 {
362 unsigned j;
363 lambda_vector v;
364
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
367 }
368
369 /* Debug version. */
370
371 DEBUG_FUNCTION void
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 {
374 dump_data_dependence_relation (stderr, ddr);
375 }
376
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378
379 void
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
382 {
383 struct data_reference *dra, *drb;
384
385 fprintf (outf, "(Data Dep: \n");
386
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 {
389 if (ddr)
390 {
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 if (dra)
394 dump_data_reference (outf, dra);
395 else
396 fprintf (outf, " (nil)\n");
397 if (drb)
398 dump_data_reference (outf, drb);
399 else
400 fprintf (outf, " (nil)\n");
401 }
402 fprintf (outf, " (don't know)\n)\n");
403 return;
404 }
405
406 dra = DDR_A (ddr);
407 drb = DDR_B (ddr);
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
410
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
413
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 {
416 unsigned int i;
417 struct loop *loopi;
418
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 {
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
426 }
427
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
433
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 {
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
439 }
440
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 {
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
445 DDR_NB_LOOPS (ddr));
446 }
447 }
448
449 fprintf (outf, ")\n");
450 }
451
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453
454 void
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
457 {
458 switch (dir)
459 {
460 case dir_positive:
461 fprintf (file, "+");
462 break;
463
464 case dir_negative:
465 fprintf (file, "-");
466 break;
467
468 case dir_equal:
469 fprintf (file, "=");
470 break;
471
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
474 break;
475
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
478 break;
479
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
482 break;
483
484 case dir_star:
485 fprintf (file, "*");
486 break;
487
488 default:
489 break;
490 }
491 }
492
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
496 considered nest. */
497
498 void
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 {
501 unsigned int i, j;
502 struct data_dependence_relation *ddr;
503 lambda_vector v;
504
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 {
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 {
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
513 }
514
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 {
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
520 }
521 }
522
523 fprintf (file, "\n\n");
524 }
525
526 /* Dumps the data dependence relations DDRS in FILE. */
527
528 void
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 {
531 unsigned int i;
532 struct data_dependence_relation *ddr;
533
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
536
537 fprintf (file, "\n\n");
538 }
539
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
544 is returned. */
545
546 static bool
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
549 {
550 tree var0, var1;
551 tree off0, off1;
552 enum tree_code ocode = code;
553
554 *var = NULL_TREE;
555 *off = NULL_TREE;
556
557 switch (code)
558 {
559 case INTEGER_CST:
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
562 return true;
563
564 case POINTER_PLUS_EXPR:
565 ocode = PLUS_EXPR;
566 /* FALLTHROUGH */
567 case PLUS_EXPR:
568 case MINUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
573 return true;
574
575 case MULT_EXPR:
576 if (TREE_CODE (op1) != INTEGER_CST)
577 return false;
578
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
582 return true;
583
584 case ADDR_EXPR:
585 {
586 tree base, poffset;
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
590
591 op0 = TREE_OPERAND (op0, 0);
592 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
593 &pmode, &punsignedp, &pvolatilep, false);
594
595 if (pbitpos % BITS_PER_UNIT != 0)
596 return false;
597 base = build_fold_addr_expr (base);
598 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
599
600 if (poffset)
601 {
602 split_constant_offset (poffset, &poffset, &off1);
603 off0 = size_binop (PLUS_EXPR, off0, off1);
604 if (POINTER_TYPE_P (TREE_TYPE (base)))
605 base = fold_build_pointer_plus (base, poffset);
606 else
607 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
608 fold_convert (TREE_TYPE (base), poffset));
609 }
610
611 var0 = fold_convert (type, base);
612
613 /* If variable length types are involved, punt, otherwise casts
614 might be converted into ARRAY_REFs in gimplify_conversion.
615 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
616 possibly no longer appears in current GIMPLE, might resurface.
617 This perhaps could run
618 if (CONVERT_EXPR_P (var0))
619 {
620 gimplify_conversion (&var0);
621 // Attempt to fill in any within var0 found ARRAY_REF's
622 // element size from corresponding op embedded ARRAY_REF,
623 // if unsuccessful, just punt.
624 } */
625 while (POINTER_TYPE_P (type))
626 type = TREE_TYPE (type);
627 if (int_size_in_bytes (type) < 0)
628 return false;
629
630 *var = var0;
631 *off = off0;
632 return true;
633 }
634
635 case SSA_NAME:
636 {
637 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
638 enum tree_code subcode;
639
640 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
641 return false;
642
643 var0 = gimple_assign_rhs1 (def_stmt);
644 subcode = gimple_assign_rhs_code (def_stmt);
645 var1 = gimple_assign_rhs2 (def_stmt);
646
647 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
648 }
649 CASE_CONVERT:
650 {
651 /* We must not introduce undefined overflow, and we must not change the value.
652 Hence we're okay if the inner type doesn't overflow to start with
653 (pointer or signed), the outer type also is an integer or pointer
654 and the outer precision is at least as large as the inner. */
655 tree itype = TREE_TYPE (op0);
656 if ((POINTER_TYPE_P (itype)
657 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
658 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
659 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
660 {
661 split_constant_offset (op0, &var0, off);
662 *var = fold_convert (type, var0);
663 return true;
664 }
665 return false;
666 }
667
668 default:
669 return false;
670 }
671 }
672
673 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
674 will be ssizetype. */
675
676 void
677 split_constant_offset (tree exp, tree *var, tree *off)
678 {
679 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
680 enum tree_code code;
681
682 *var = exp;
683 *off = ssize_int (0);
684 STRIP_NOPS (exp);
685
686 if (tree_is_chrec (exp)
687 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
688 return;
689
690 otype = TREE_TYPE (exp);
691 code = TREE_CODE (exp);
692 extract_ops_from_tree (exp, &code, &op0, &op1);
693 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
694 {
695 *var = fold_convert (type, e);
696 *off = o;
697 }
698 }
699
700 /* Returns the address ADDR of an object in a canonical shape (without nop
701 casts, and with type of pointer to the object). */
702
703 static tree
704 canonicalize_base_object_address (tree addr)
705 {
706 tree orig = addr;
707
708 STRIP_NOPS (addr);
709
710 /* The base address may be obtained by casting from integer, in that case
711 keep the cast. */
712 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
713 return orig;
714
715 if (TREE_CODE (addr) != ADDR_EXPR)
716 return addr;
717
718 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
719 }
720
721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
722 basic block that contains it. Returns true if analysis succeed or false
723 otherwise. */
724
725 bool
726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
727 {
728 gimple stmt = DR_STMT (dr);
729 struct loop *loop = loop_containing_stmt (stmt);
730 tree ref = DR_REF (dr);
731 HOST_WIDE_INT pbitsize, pbitpos;
732 tree base, poffset;
733 enum machine_mode pmode;
734 int punsignedp, pvolatilep;
735 affine_iv base_iv, offset_iv;
736 tree init, dinit, step;
737 bool in_loop = (loop && loop->num);
738
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "analyze_innermost: ");
741
742 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
743 &pmode, &punsignedp, &pvolatilep, false);
744 gcc_assert (base != NULL_TREE);
745
746 if (pbitpos % BITS_PER_UNIT != 0)
747 {
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 fprintf (dump_file, "failed: bit offset alignment.\n");
750 return false;
751 }
752
753 if (TREE_CODE (base) == MEM_REF)
754 {
755 if (!integer_zerop (TREE_OPERAND (base, 1)))
756 {
757 if (!poffset)
758 {
759 double_int moff = mem_ref_offset (base);
760 poffset = double_int_to_tree (sizetype, moff);
761 }
762 else
763 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
764 }
765 base = TREE_OPERAND (base, 0);
766 }
767 else
768 base = build_fold_addr_expr (base);
769
770 if (in_loop)
771 {
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
774 {
775 if (nest)
776 {
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, "failed: evolution of base is not"
779 " affine.\n");
780 return false;
781 }
782 else
783 {
784 base_iv.base = base;
785 base_iv.step = ssize_int (0);
786 base_iv.no_overflow = true;
787 }
788 }
789 }
790 else
791 {
792 base_iv.base = base;
793 base_iv.step = ssize_int (0);
794 base_iv.no_overflow = true;
795 }
796
797 if (!poffset)
798 {
799 offset_iv.base = ssize_int (0);
800 offset_iv.step = ssize_int (0);
801 }
802 else
803 {
804 if (!in_loop)
805 {
806 offset_iv.base = poffset;
807 offset_iv.step = ssize_int (0);
808 }
809 else if (!simple_iv (loop, loop_containing_stmt (stmt),
810 poffset, &offset_iv, false))
811 {
812 if (nest)
813 {
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "failed: evolution of offset is not"
816 " affine.\n");
817 return false;
818 }
819 else
820 {
821 offset_iv.base = poffset;
822 offset_iv.step = ssize_int (0);
823 }
824 }
825 }
826
827 init = ssize_int (pbitpos / BITS_PER_UNIT);
828 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
829 init = size_binop (PLUS_EXPR, init, dinit);
830 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
831 init = size_binop (PLUS_EXPR, init, dinit);
832
833 step = size_binop (PLUS_EXPR,
834 fold_convert (ssizetype, base_iv.step),
835 fold_convert (ssizetype, offset_iv.step));
836
837 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
838
839 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
840 DR_INIT (dr) = init;
841 DR_STEP (dr) = step;
842
843 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
844
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "success.\n");
847
848 return true;
849 }
850
851 /* Determines the base object and the list of indices of memory reference
852 DR, analyzed in LOOP and instantiated in loop nest NEST. */
853
854 static void
855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
856 {
857 VEC (tree, heap) *access_fns = NULL;
858 tree ref, aref, op;
859 tree base, off, access_fn;
860 basic_block before_loop;
861
862 /* If analyzing a basic-block there are no indices to analyze
863 and thus no access functions. */
864 if (!nest)
865 {
866 DR_BASE_OBJECT (dr) = DR_REF (dr);
867 DR_ACCESS_FNS (dr) = NULL;
868 return;
869 }
870
871 ref = unshare_expr (DR_REF (dr));
872 before_loop = block_before_loop (nest);
873
874 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
875 into a two element array with a constant index. The base is
876 then just the immediate underlying object. */
877 if (TREE_CODE (ref) == REALPART_EXPR)
878 {
879 ref = TREE_OPERAND (ref, 0);
880 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
881 }
882 else if (TREE_CODE (ref) == IMAGPART_EXPR)
883 {
884 ref = TREE_OPERAND (ref, 0);
885 VEC_safe_push (tree, heap, access_fns, integer_one_node);
886 }
887
888 /* Analyze access functions of dimensions we know to be independent. */
889 aref = ref;
890 while (handled_component_p (aref))
891 {
892 if (TREE_CODE (aref) == ARRAY_REF)
893 {
894 op = TREE_OPERAND (aref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
898 /* For ARRAY_REFs the base is the reference with the index replaced
899 by zero if we can not strip it as the outermost component. */
900 if (aref == ref)
901 ref = TREE_OPERAND (ref, 0);
902 else
903 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
904 }
905
906 aref = TREE_OPERAND (aref, 0);
907 }
908
909 /* If the address operand of a MEM_REF base has an evolution in the
910 analyzed nest, add it as an additional independent access-function. */
911 if (TREE_CODE (aref) == MEM_REF)
912 {
913 op = TREE_OPERAND (aref, 0);
914 access_fn = analyze_scalar_evolution (loop, op);
915 access_fn = instantiate_scev (before_loop, loop, access_fn);
916 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
917 {
918 base = initial_condition (access_fn);
919 split_constant_offset (base, &base, &off);
920 /* Fold the MEM_REF offset into the evolutions initial
921 value to make more bases comparable. */
922 if (!integer_zerop (TREE_OPERAND (aref, 1)))
923 {
924 off = size_binop (PLUS_EXPR, off,
925 fold_convert (ssizetype,
926 TREE_OPERAND (aref, 1)));
927 TREE_OPERAND (aref, 1)
928 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
929 }
930 access_fn = chrec_replace_initial_condition
931 (access_fn, fold_convert (TREE_TYPE (base), off));
932 TREE_OPERAND (aref, 0) = base;
933 VEC_safe_push (tree, heap, access_fns, access_fn);
934 }
935 }
936
937 DR_BASE_OBJECT (dr) = ref;
938 DR_ACCESS_FNS (dr) = access_fns;
939 }
940
941 /* Extracts the alias analysis information from the memory reference DR. */
942
943 static void
944 dr_analyze_alias (struct data_reference *dr)
945 {
946 tree ref = DR_REF (dr);
947 tree base = get_base_address (ref), addr;
948
949 if (INDIRECT_REF_P (base)
950 || TREE_CODE (base) == MEM_REF)
951 {
952 addr = TREE_OPERAND (base, 0);
953 if (TREE_CODE (addr) == SSA_NAME)
954 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
955 }
956 }
957
958 /* Frees data reference DR. */
959
960 void
961 free_data_ref (data_reference_p dr)
962 {
963 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
964 free (dr);
965 }
966
967 /* Analyzes memory reference MEMREF accessed in STMT. The reference
968 is read if IS_READ is true, write otherwise. Returns the
969 data_reference description of MEMREF. NEST is the outermost loop
970 in which the reference should be instantiated, LOOP is the loop in
971 which the data reference should be analyzed. */
972
973 struct data_reference *
974 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
975 bool is_read)
976 {
977 struct data_reference *dr;
978
979 if (dump_file && (dump_flags & TDF_DETAILS))
980 {
981 fprintf (dump_file, "Creating dr for ");
982 print_generic_expr (dump_file, memref, TDF_SLIM);
983 fprintf (dump_file, "\n");
984 }
985
986 dr = XCNEW (struct data_reference);
987 DR_STMT (dr) = stmt;
988 DR_REF (dr) = memref;
989 DR_IS_READ (dr) = is_read;
990
991 dr_analyze_innermost (dr, nest);
992 dr_analyze_indices (dr, nest, loop);
993 dr_analyze_alias (dr);
994
995 if (dump_file && (dump_flags & TDF_DETAILS))
996 {
997 unsigned i;
998 fprintf (dump_file, "\tbase_address: ");
999 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1000 fprintf (dump_file, "\n\toffset from base address: ");
1001 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1002 fprintf (dump_file, "\n\tconstant offset from base address: ");
1003 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1004 fprintf (dump_file, "\n\tstep: ");
1005 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1006 fprintf (dump_file, "\n\taligned to: ");
1007 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1008 fprintf (dump_file, "\n\tbase_object: ");
1009 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1010 fprintf (dump_file, "\n");
1011 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1012 {
1013 fprintf (dump_file, "\tAccess function %d: ", i);
1014 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1015 }
1016 }
1017
1018 return dr;
1019 }
1020
1021 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1022 expressions. */
1023 static bool
1024 dr_equal_offsets_p1 (tree offset1, tree offset2)
1025 {
1026 bool res;
1027
1028 STRIP_NOPS (offset1);
1029 STRIP_NOPS (offset2);
1030
1031 if (offset1 == offset2)
1032 return true;
1033
1034 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1035 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1036 return false;
1037
1038 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1039 TREE_OPERAND (offset2, 0));
1040
1041 if (!res || !BINARY_CLASS_P (offset1))
1042 return res;
1043
1044 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1045 TREE_OPERAND (offset2, 1));
1046
1047 return res;
1048 }
1049
1050 /* Check if DRA and DRB have equal offsets. */
1051 bool
1052 dr_equal_offsets_p (struct data_reference *dra,
1053 struct data_reference *drb)
1054 {
1055 tree offset1, offset2;
1056
1057 offset1 = DR_OFFSET (dra);
1058 offset2 = DR_OFFSET (drb);
1059
1060 return dr_equal_offsets_p1 (offset1, offset2);
1061 }
1062
1063 /* Returns true if FNA == FNB. */
1064
1065 static bool
1066 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1067 {
1068 unsigned i, n = VEC_length (tree, fna);
1069
1070 if (n != VEC_length (tree, fnb))
1071 return false;
1072
1073 for (i = 0; i < n; i++)
1074 if (!operand_equal_p (VEC_index (tree, fna, i),
1075 VEC_index (tree, fnb, i), 0))
1076 return false;
1077
1078 return true;
1079 }
1080
1081 /* If all the functions in CF are the same, returns one of them,
1082 otherwise returns NULL. */
1083
1084 static affine_fn
1085 common_affine_function (conflict_function *cf)
1086 {
1087 unsigned i;
1088 affine_fn comm;
1089
1090 if (!CF_NONTRIVIAL_P (cf))
1091 return NULL;
1092
1093 comm = cf->fns[0];
1094
1095 for (i = 1; i < cf->n; i++)
1096 if (!affine_function_equal_p (comm, cf->fns[i]))
1097 return NULL;
1098
1099 return comm;
1100 }
1101
1102 /* Returns the base of the affine function FN. */
1103
1104 static tree
1105 affine_function_base (affine_fn fn)
1106 {
1107 return VEC_index (tree, fn, 0);
1108 }
1109
1110 /* Returns true if FN is a constant. */
1111
1112 static bool
1113 affine_function_constant_p (affine_fn fn)
1114 {
1115 unsigned i;
1116 tree coef;
1117
1118 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1119 if (!integer_zerop (coef))
1120 return false;
1121
1122 return true;
1123 }
1124
1125 /* Returns true if FN is the zero constant function. */
1126
1127 static bool
1128 affine_function_zero_p (affine_fn fn)
1129 {
1130 return (integer_zerop (affine_function_base (fn))
1131 && affine_function_constant_p (fn));
1132 }
1133
1134 /* Returns a signed integer type with the largest precision from TA
1135 and TB. */
1136
1137 static tree
1138 signed_type_for_types (tree ta, tree tb)
1139 {
1140 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1141 return signed_type_for (ta);
1142 else
1143 return signed_type_for (tb);
1144 }
1145
1146 /* Applies operation OP on affine functions FNA and FNB, and returns the
1147 result. */
1148
1149 static affine_fn
1150 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1151 {
1152 unsigned i, n, m;
1153 affine_fn ret;
1154 tree coef;
1155
1156 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1157 {
1158 n = VEC_length (tree, fna);
1159 m = VEC_length (tree, fnb);
1160 }
1161 else
1162 {
1163 n = VEC_length (tree, fnb);
1164 m = VEC_length (tree, fna);
1165 }
1166
1167 ret = VEC_alloc (tree, heap, m);
1168 for (i = 0; i < n; i++)
1169 {
1170 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1171 TREE_TYPE (VEC_index (tree, fnb, i)));
1172
1173 VEC_quick_push (tree, ret,
1174 fold_build2 (op, type,
1175 VEC_index (tree, fna, i),
1176 VEC_index (tree, fnb, i)));
1177 }
1178
1179 for (; VEC_iterate (tree, fna, i, coef); i++)
1180 VEC_quick_push (tree, ret,
1181 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1182 coef, integer_zero_node));
1183 for (; VEC_iterate (tree, fnb, i, coef); i++)
1184 VEC_quick_push (tree, ret,
1185 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1186 integer_zero_node, coef));
1187
1188 return ret;
1189 }
1190
1191 /* Returns the sum of affine functions FNA and FNB. */
1192
1193 static affine_fn
1194 affine_fn_plus (affine_fn fna, affine_fn fnb)
1195 {
1196 return affine_fn_op (PLUS_EXPR, fna, fnb);
1197 }
1198
1199 /* Returns the difference of affine functions FNA and FNB. */
1200
1201 static affine_fn
1202 affine_fn_minus (affine_fn fna, affine_fn fnb)
1203 {
1204 return affine_fn_op (MINUS_EXPR, fna, fnb);
1205 }
1206
1207 /* Frees affine function FN. */
1208
1209 static void
1210 affine_fn_free (affine_fn fn)
1211 {
1212 VEC_free (tree, heap, fn);
1213 }
1214
1215 /* Determine for each subscript in the data dependence relation DDR
1216 the distance. */
1217
1218 static void
1219 compute_subscript_distance (struct data_dependence_relation *ddr)
1220 {
1221 conflict_function *cf_a, *cf_b;
1222 affine_fn fn_a, fn_b, diff;
1223
1224 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1225 {
1226 unsigned int i;
1227
1228 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1229 {
1230 struct subscript *subscript;
1231
1232 subscript = DDR_SUBSCRIPT (ddr, i);
1233 cf_a = SUB_CONFLICTS_IN_A (subscript);
1234 cf_b = SUB_CONFLICTS_IN_B (subscript);
1235
1236 fn_a = common_affine_function (cf_a);
1237 fn_b = common_affine_function (cf_b);
1238 if (!fn_a || !fn_b)
1239 {
1240 SUB_DISTANCE (subscript) = chrec_dont_know;
1241 return;
1242 }
1243 diff = affine_fn_minus (fn_a, fn_b);
1244
1245 if (affine_function_constant_p (diff))
1246 SUB_DISTANCE (subscript) = affine_function_base (diff);
1247 else
1248 SUB_DISTANCE (subscript) = chrec_dont_know;
1249
1250 affine_fn_free (diff);
1251 }
1252 }
1253 }
1254
1255 /* Returns the conflict function for "unknown". */
1256
1257 static conflict_function *
1258 conflict_fn_not_known (void)
1259 {
1260 conflict_function *fn = XCNEW (conflict_function);
1261 fn->n = NOT_KNOWN;
1262
1263 return fn;
1264 }
1265
1266 /* Returns the conflict function for "independent". */
1267
1268 static conflict_function *
1269 conflict_fn_no_dependence (void)
1270 {
1271 conflict_function *fn = XCNEW (conflict_function);
1272 fn->n = NO_DEPENDENCE;
1273
1274 return fn;
1275 }
1276
1277 /* Returns true if the address of OBJ is invariant in LOOP. */
1278
1279 static bool
1280 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1281 {
1282 while (handled_component_p (obj))
1283 {
1284 if (TREE_CODE (obj) == ARRAY_REF)
1285 {
1286 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1287 need to check the stride and the lower bound of the reference. */
1288 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1289 loop->num)
1290 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1291 loop->num))
1292 return false;
1293 }
1294 else if (TREE_CODE (obj) == COMPONENT_REF)
1295 {
1296 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297 loop->num))
1298 return false;
1299 }
1300 obj = TREE_OPERAND (obj, 0);
1301 }
1302
1303 if (!INDIRECT_REF_P (obj)
1304 && TREE_CODE (obj) != MEM_REF)
1305 return true;
1306
1307 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1308 loop->num);
1309 }
1310
1311 /* Returns false if we can prove that data references A and B do not alias,
1312 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1313 considered. */
1314
1315 bool
1316 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1317 bool loop_nest)
1318 {
1319 tree addr_a = DR_BASE_OBJECT (a);
1320 tree addr_b = DR_BASE_OBJECT (b);
1321
1322 /* If we are not processing a loop nest but scalar code we
1323 do not need to care about possible cross-iteration dependences
1324 and thus can process the full original reference. Do so,
1325 similar to how loop invariant motion applies extra offset-based
1326 disambiguation. */
1327 if (!loop_nest)
1328 {
1329 aff_tree off1, off2;
1330 double_int size1, size2;
1331 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1332 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1333 aff_combination_scale (&off1, double_int_minus_one);
1334 aff_combination_add (&off2, &off1);
1335 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1336 return false;
1337 }
1338
1339 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1340 return refs_output_dependent_p (addr_a, addr_b);
1341 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1342 return refs_anti_dependent_p (addr_a, addr_b);
1343 return refs_may_alias_p (addr_a, addr_b);
1344 }
1345
1346 static void compute_self_dependence (struct data_dependence_relation *);
1347
1348 /* Initialize a data dependence relation between data accesses A and
1349 B. NB_LOOPS is the number of loops surrounding the references: the
1350 size of the classic distance/direction vectors. */
1351
1352 static struct data_dependence_relation *
1353 initialize_data_dependence_relation (struct data_reference *a,
1354 struct data_reference *b,
1355 VEC (loop_p, heap) *loop_nest)
1356 {
1357 struct data_dependence_relation *res;
1358 unsigned int i;
1359
1360 res = XNEW (struct data_dependence_relation);
1361 DDR_A (res) = a;
1362 DDR_B (res) = b;
1363 DDR_LOOP_NEST (res) = NULL;
1364 DDR_REVERSED_P (res) = false;
1365 DDR_SUBSCRIPTS (res) = NULL;
1366 DDR_DIR_VECTS (res) = NULL;
1367 DDR_DIST_VECTS (res) = NULL;
1368
1369 if (a == NULL || b == NULL)
1370 {
1371 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1372 return res;
1373 }
1374
1375 /* If the data references do not alias, then they are independent. */
1376 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1377 {
1378 DDR_ARE_DEPENDENT (res) = chrec_known;
1379 return res;
1380 }
1381
1382 /* When the references are exactly the same, don't spend time doing
1383 the data dependence tests, just initialize the ddr and return. */
1384 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1385 {
1386 DDR_AFFINE_P (res) = true;
1387 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1388 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1389 DDR_LOOP_NEST (res) = loop_nest;
1390 DDR_INNER_LOOP (res) = 0;
1391 DDR_SELF_REFERENCE (res) = true;
1392 compute_self_dependence (res);
1393 return res;
1394 }
1395
1396 /* If the references do not access the same object, we do not know
1397 whether they alias or not. */
1398 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1399 {
1400 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1401 return res;
1402 }
1403
1404 /* If the base of the object is not invariant in the loop nest, we cannot
1405 analyze it. TODO -- in fact, it would suffice to record that there may
1406 be arbitrary dependences in the loops where the base object varies. */
1407 if (loop_nest
1408 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1409 DR_BASE_OBJECT (a)))
1410 {
1411 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1412 return res;
1413 }
1414
1415 /* If the number of dimensions of the access to not agree we can have
1416 a pointer access to a component of the array element type and an
1417 array access while the base-objects are still the same. Punt. */
1418 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1419 {
1420 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1421 return res;
1422 }
1423
1424 DDR_AFFINE_P (res) = true;
1425 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1426 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1427 DDR_LOOP_NEST (res) = loop_nest;
1428 DDR_INNER_LOOP (res) = 0;
1429 DDR_SELF_REFERENCE (res) = false;
1430
1431 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1432 {
1433 struct subscript *subscript;
1434
1435 subscript = XNEW (struct subscript);
1436 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1437 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1438 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1439 SUB_DISTANCE (subscript) = chrec_dont_know;
1440 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1441 }
1442
1443 return res;
1444 }
1445
1446 /* Frees memory used by the conflict function F. */
1447
1448 static void
1449 free_conflict_function (conflict_function *f)
1450 {
1451 unsigned i;
1452
1453 if (CF_NONTRIVIAL_P (f))
1454 {
1455 for (i = 0; i < f->n; i++)
1456 affine_fn_free (f->fns[i]);
1457 }
1458 free (f);
1459 }
1460
1461 /* Frees memory used by SUBSCRIPTS. */
1462
1463 static void
1464 free_subscripts (VEC (subscript_p, heap) *subscripts)
1465 {
1466 unsigned i;
1467 subscript_p s;
1468
1469 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1470 {
1471 free_conflict_function (s->conflicting_iterations_in_a);
1472 free_conflict_function (s->conflicting_iterations_in_b);
1473 free (s);
1474 }
1475 VEC_free (subscript_p, heap, subscripts);
1476 }
1477
1478 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1479 description. */
1480
1481 static inline void
1482 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1483 tree chrec)
1484 {
1485 if (dump_file && (dump_flags & TDF_DETAILS))
1486 {
1487 fprintf (dump_file, "(dependence classified: ");
1488 print_generic_expr (dump_file, chrec, 0);
1489 fprintf (dump_file, ")\n");
1490 }
1491
1492 DDR_ARE_DEPENDENT (ddr) = chrec;
1493 free_subscripts (DDR_SUBSCRIPTS (ddr));
1494 DDR_SUBSCRIPTS (ddr) = NULL;
1495 }
1496
1497 /* The dependence relation DDR cannot be represented by a distance
1498 vector. */
1499
1500 static inline void
1501 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1502 {
1503 if (dump_file && (dump_flags & TDF_DETAILS))
1504 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1505
1506 DDR_AFFINE_P (ddr) = false;
1507 }
1508
1509 \f
1510
1511 /* This section contains the classic Banerjee tests. */
1512
1513 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1514 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1515
1516 static inline bool
1517 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1518 {
1519 return (evolution_function_is_constant_p (chrec_a)
1520 && evolution_function_is_constant_p (chrec_b));
1521 }
1522
1523 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1524 variable, i.e., if the SIV (Single Index Variable) test is true. */
1525
1526 static bool
1527 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1528 {
1529 if ((evolution_function_is_constant_p (chrec_a)
1530 && evolution_function_is_univariate_p (chrec_b))
1531 || (evolution_function_is_constant_p (chrec_b)
1532 && evolution_function_is_univariate_p (chrec_a)))
1533 return true;
1534
1535 if (evolution_function_is_univariate_p (chrec_a)
1536 && evolution_function_is_univariate_p (chrec_b))
1537 {
1538 switch (TREE_CODE (chrec_a))
1539 {
1540 case POLYNOMIAL_CHREC:
1541 switch (TREE_CODE (chrec_b))
1542 {
1543 case POLYNOMIAL_CHREC:
1544 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1545 return false;
1546
1547 default:
1548 return true;
1549 }
1550
1551 default:
1552 return true;
1553 }
1554 }
1555
1556 return false;
1557 }
1558
1559 /* Creates a conflict function with N dimensions. The affine functions
1560 in each dimension follow. */
1561
1562 static conflict_function *
1563 conflict_fn (unsigned n, ...)
1564 {
1565 unsigned i;
1566 conflict_function *ret = XCNEW (conflict_function);
1567 va_list ap;
1568
1569 gcc_assert (0 < n && n <= MAX_DIM);
1570 va_start(ap, n);
1571
1572 ret->n = n;
1573 for (i = 0; i < n; i++)
1574 ret->fns[i] = va_arg (ap, affine_fn);
1575 va_end(ap);
1576
1577 return ret;
1578 }
1579
1580 /* Returns constant affine function with value CST. */
1581
1582 static affine_fn
1583 affine_fn_cst (tree cst)
1584 {
1585 affine_fn fn = VEC_alloc (tree, heap, 1);
1586 VEC_quick_push (tree, fn, cst);
1587 return fn;
1588 }
1589
1590 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1591
1592 static affine_fn
1593 affine_fn_univar (tree cst, unsigned dim, tree coef)
1594 {
1595 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1596 unsigned i;
1597
1598 gcc_assert (dim > 0);
1599 VEC_quick_push (tree, fn, cst);
1600 for (i = 1; i < dim; i++)
1601 VEC_quick_push (tree, fn, integer_zero_node);
1602 VEC_quick_push (tree, fn, coef);
1603 return fn;
1604 }
1605
1606 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1607 *OVERLAPS_B are initialized to the functions that describe the
1608 relation between the elements accessed twice by CHREC_A and
1609 CHREC_B. For k >= 0, the following property is verified:
1610
1611 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1612
1613 static void
1614 analyze_ziv_subscript (tree chrec_a,
1615 tree chrec_b,
1616 conflict_function **overlaps_a,
1617 conflict_function **overlaps_b,
1618 tree *last_conflicts)
1619 {
1620 tree type, difference;
1621 dependence_stats.num_ziv++;
1622
1623 if (dump_file && (dump_flags & TDF_DETAILS))
1624 fprintf (dump_file, "(analyze_ziv_subscript \n");
1625
1626 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1627 chrec_a = chrec_convert (type, chrec_a, NULL);
1628 chrec_b = chrec_convert (type, chrec_b, NULL);
1629 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1630
1631 switch (TREE_CODE (difference))
1632 {
1633 case INTEGER_CST:
1634 if (integer_zerop (difference))
1635 {
1636 /* The difference is equal to zero: the accessed index
1637 overlaps for each iteration in the loop. */
1638 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1639 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1640 *last_conflicts = chrec_dont_know;
1641 dependence_stats.num_ziv_dependent++;
1642 }
1643 else
1644 {
1645 /* The accesses do not overlap. */
1646 *overlaps_a = conflict_fn_no_dependence ();
1647 *overlaps_b = conflict_fn_no_dependence ();
1648 *last_conflicts = integer_zero_node;
1649 dependence_stats.num_ziv_independent++;
1650 }
1651 break;
1652
1653 default:
1654 /* We're not sure whether the indexes overlap. For the moment,
1655 conservatively answer "don't know". */
1656 if (dump_file && (dump_flags & TDF_DETAILS))
1657 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1658
1659 *overlaps_a = conflict_fn_not_known ();
1660 *overlaps_b = conflict_fn_not_known ();
1661 *last_conflicts = chrec_dont_know;
1662 dependence_stats.num_ziv_unimplemented++;
1663 break;
1664 }
1665
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1667 fprintf (dump_file, ")\n");
1668 }
1669
1670 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1671 and only if it fits to the int type. If this is not the case, or the
1672 bound on the number of iterations of LOOP could not be derived, returns
1673 chrec_dont_know. */
1674
1675 static tree
1676 max_stmt_executions_tree (struct loop *loop)
1677 {
1678 double_int nit;
1679
1680 if (!max_stmt_executions (loop, true, &nit))
1681 return chrec_dont_know;
1682
1683 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1684 return chrec_dont_know;
1685
1686 return double_int_to_tree (unsigned_type_node, nit);
1687 }
1688
1689 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1690 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1691 *OVERLAPS_B are initialized to the functions that describe the
1692 relation between the elements accessed twice by CHREC_A and
1693 CHREC_B. For k >= 0, the following property is verified:
1694
1695 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1696
1697 static void
1698 analyze_siv_subscript_cst_affine (tree chrec_a,
1699 tree chrec_b,
1700 conflict_function **overlaps_a,
1701 conflict_function **overlaps_b,
1702 tree *last_conflicts)
1703 {
1704 bool value0, value1, value2;
1705 tree type, difference, tmp;
1706
1707 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1708 chrec_a = chrec_convert (type, chrec_a, NULL);
1709 chrec_b = chrec_convert (type, chrec_b, NULL);
1710 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1711
1712 if (!chrec_is_positive (initial_condition (difference), &value0))
1713 {
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1715 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1716
1717 dependence_stats.num_siv_unimplemented++;
1718 *overlaps_a = conflict_fn_not_known ();
1719 *overlaps_b = conflict_fn_not_known ();
1720 *last_conflicts = chrec_dont_know;
1721 return;
1722 }
1723 else
1724 {
1725 if (value0 == false)
1726 {
1727 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1728 {
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1731
1732 *overlaps_a = conflict_fn_not_known ();
1733 *overlaps_b = conflict_fn_not_known ();
1734 *last_conflicts = chrec_dont_know;
1735 dependence_stats.num_siv_unimplemented++;
1736 return;
1737 }
1738 else
1739 {
1740 if (value1 == true)
1741 {
1742 /* Example:
1743 chrec_a = 12
1744 chrec_b = {10, +, 1}
1745 */
1746
1747 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1748 {
1749 HOST_WIDE_INT numiter;
1750 struct loop *loop = get_chrec_loop (chrec_b);
1751
1752 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1753 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1754 fold_build1 (ABS_EXPR, type, difference),
1755 CHREC_RIGHT (chrec_b));
1756 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1757 *last_conflicts = integer_one_node;
1758
1759
1760 /* Perform weak-zero siv test to see if overlap is
1761 outside the loop bounds. */
1762 numiter = max_stmt_executions_int (loop, true);
1763
1764 if (numiter >= 0
1765 && compare_tree_int (tmp, numiter) > 0)
1766 {
1767 free_conflict_function (*overlaps_a);
1768 free_conflict_function (*overlaps_b);
1769 *overlaps_a = conflict_fn_no_dependence ();
1770 *overlaps_b = conflict_fn_no_dependence ();
1771 *last_conflicts = integer_zero_node;
1772 dependence_stats.num_siv_independent++;
1773 return;
1774 }
1775 dependence_stats.num_siv_dependent++;
1776 return;
1777 }
1778
1779 /* When the step does not divide the difference, there are
1780 no overlaps. */
1781 else
1782 {
1783 *overlaps_a = conflict_fn_no_dependence ();
1784 *overlaps_b = conflict_fn_no_dependence ();
1785 *last_conflicts = integer_zero_node;
1786 dependence_stats.num_siv_independent++;
1787 return;
1788 }
1789 }
1790
1791 else
1792 {
1793 /* Example:
1794 chrec_a = 12
1795 chrec_b = {10, +, -1}
1796
1797 In this case, chrec_a will not overlap with chrec_b. */
1798 *overlaps_a = conflict_fn_no_dependence ();
1799 *overlaps_b = conflict_fn_no_dependence ();
1800 *last_conflicts = integer_zero_node;
1801 dependence_stats.num_siv_independent++;
1802 return;
1803 }
1804 }
1805 }
1806 else
1807 {
1808 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1809 {
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1811 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1812
1813 *overlaps_a = conflict_fn_not_known ();
1814 *overlaps_b = conflict_fn_not_known ();
1815 *last_conflicts = chrec_dont_know;
1816 dependence_stats.num_siv_unimplemented++;
1817 return;
1818 }
1819 else
1820 {
1821 if (value2 == false)
1822 {
1823 /* Example:
1824 chrec_a = 3
1825 chrec_b = {10, +, -1}
1826 */
1827 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1828 {
1829 HOST_WIDE_INT numiter;
1830 struct loop *loop = get_chrec_loop (chrec_b);
1831
1832 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1833 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1834 CHREC_RIGHT (chrec_b));
1835 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1836 *last_conflicts = integer_one_node;
1837
1838 /* Perform weak-zero siv test to see if overlap is
1839 outside the loop bounds. */
1840 numiter = max_stmt_executions_int (loop, true);
1841
1842 if (numiter >= 0
1843 && compare_tree_int (tmp, numiter) > 0)
1844 {
1845 free_conflict_function (*overlaps_a);
1846 free_conflict_function (*overlaps_b);
1847 *overlaps_a = conflict_fn_no_dependence ();
1848 *overlaps_b = conflict_fn_no_dependence ();
1849 *last_conflicts = integer_zero_node;
1850 dependence_stats.num_siv_independent++;
1851 return;
1852 }
1853 dependence_stats.num_siv_dependent++;
1854 return;
1855 }
1856
1857 /* When the step does not divide the difference, there
1858 are no overlaps. */
1859 else
1860 {
1861 *overlaps_a = conflict_fn_no_dependence ();
1862 *overlaps_b = conflict_fn_no_dependence ();
1863 *last_conflicts = integer_zero_node;
1864 dependence_stats.num_siv_independent++;
1865 return;
1866 }
1867 }
1868 else
1869 {
1870 /* Example:
1871 chrec_a = 3
1872 chrec_b = {4, +, 1}
1873
1874 In this case, chrec_a will not overlap with chrec_b. */
1875 *overlaps_a = conflict_fn_no_dependence ();
1876 *overlaps_b = conflict_fn_no_dependence ();
1877 *last_conflicts = integer_zero_node;
1878 dependence_stats.num_siv_independent++;
1879 return;
1880 }
1881 }
1882 }
1883 }
1884 }
1885
1886 /* Helper recursive function for initializing the matrix A. Returns
1887 the initial value of CHREC. */
1888
1889 static tree
1890 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1891 {
1892 gcc_assert (chrec);
1893
1894 switch (TREE_CODE (chrec))
1895 {
1896 case POLYNOMIAL_CHREC:
1897 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1898
1899 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1900 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1901
1902 case PLUS_EXPR:
1903 case MULT_EXPR:
1904 case MINUS_EXPR:
1905 {
1906 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1907 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1908
1909 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1910 }
1911
1912 case NOP_EXPR:
1913 {
1914 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1915 return chrec_convert (chrec_type (chrec), op, NULL);
1916 }
1917
1918 case BIT_NOT_EXPR:
1919 {
1920 /* Handle ~X as -1 - X. */
1921 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1922 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1923 build_int_cst (TREE_TYPE (chrec), -1), op);
1924 }
1925
1926 case INTEGER_CST:
1927 return chrec;
1928
1929 default:
1930 gcc_unreachable ();
1931 return NULL_TREE;
1932 }
1933 }
1934
1935 #define FLOOR_DIV(x,y) ((x) / (y))
1936
1937 /* Solves the special case of the Diophantine equation:
1938 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1939
1940 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1941 number of iterations that loops X and Y run. The overlaps will be
1942 constructed as evolutions in dimension DIM. */
1943
1944 static void
1945 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1946 affine_fn *overlaps_a,
1947 affine_fn *overlaps_b,
1948 tree *last_conflicts, int dim)
1949 {
1950 if (((step_a > 0 && step_b > 0)
1951 || (step_a < 0 && step_b < 0)))
1952 {
1953 int step_overlaps_a, step_overlaps_b;
1954 int gcd_steps_a_b, last_conflict, tau2;
1955
1956 gcd_steps_a_b = gcd (step_a, step_b);
1957 step_overlaps_a = step_b / gcd_steps_a_b;
1958 step_overlaps_b = step_a / gcd_steps_a_b;
1959
1960 if (niter > 0)
1961 {
1962 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1963 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1964 last_conflict = tau2;
1965 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1966 }
1967 else
1968 *last_conflicts = chrec_dont_know;
1969
1970 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1971 build_int_cst (NULL_TREE,
1972 step_overlaps_a));
1973 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1974 build_int_cst (NULL_TREE,
1975 step_overlaps_b));
1976 }
1977
1978 else
1979 {
1980 *overlaps_a = affine_fn_cst (integer_zero_node);
1981 *overlaps_b = affine_fn_cst (integer_zero_node);
1982 *last_conflicts = integer_zero_node;
1983 }
1984 }
1985
1986 /* Solves the special case of a Diophantine equation where CHREC_A is
1987 an affine bivariate function, and CHREC_B is an affine univariate
1988 function. For example,
1989
1990 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1991
1992 has the following overlapping functions:
1993
1994 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1995 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1996 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1997
1998 FORNOW: This is a specialized implementation for a case occurring in
1999 a common benchmark. Implement the general algorithm. */
2000
2001 static void
2002 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2003 conflict_function **overlaps_a,
2004 conflict_function **overlaps_b,
2005 tree *last_conflicts)
2006 {
2007 bool xz_p, yz_p, xyz_p;
2008 int step_x, step_y, step_z;
2009 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2010 affine_fn overlaps_a_xz, overlaps_b_xz;
2011 affine_fn overlaps_a_yz, overlaps_b_yz;
2012 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2013 affine_fn ova1, ova2, ovb;
2014 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2015
2016 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2017 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2018 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2019
2020 niter_x =
2021 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2022 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2023 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2024
2025 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2026 {
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2029
2030 *overlaps_a = conflict_fn_not_known ();
2031 *overlaps_b = conflict_fn_not_known ();
2032 *last_conflicts = chrec_dont_know;
2033 return;
2034 }
2035
2036 niter = MIN (niter_x, niter_z);
2037 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2038 &overlaps_a_xz,
2039 &overlaps_b_xz,
2040 &last_conflicts_xz, 1);
2041 niter = MIN (niter_y, niter_z);
2042 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2043 &overlaps_a_yz,
2044 &overlaps_b_yz,
2045 &last_conflicts_yz, 2);
2046 niter = MIN (niter_x, niter_z);
2047 niter = MIN (niter_y, niter);
2048 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2049 &overlaps_a_xyz,
2050 &overlaps_b_xyz,
2051 &last_conflicts_xyz, 3);
2052
2053 xz_p = !integer_zerop (last_conflicts_xz);
2054 yz_p = !integer_zerop (last_conflicts_yz);
2055 xyz_p = !integer_zerop (last_conflicts_xyz);
2056
2057 if (xz_p || yz_p || xyz_p)
2058 {
2059 ova1 = affine_fn_cst (integer_zero_node);
2060 ova2 = affine_fn_cst (integer_zero_node);
2061 ovb = affine_fn_cst (integer_zero_node);
2062 if (xz_p)
2063 {
2064 affine_fn t0 = ova1;
2065 affine_fn t2 = ovb;
2066
2067 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2068 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2069 affine_fn_free (t0);
2070 affine_fn_free (t2);
2071 *last_conflicts = last_conflicts_xz;
2072 }
2073 if (yz_p)
2074 {
2075 affine_fn t0 = ova2;
2076 affine_fn t2 = ovb;
2077
2078 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2079 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2080 affine_fn_free (t0);
2081 affine_fn_free (t2);
2082 *last_conflicts = last_conflicts_yz;
2083 }
2084 if (xyz_p)
2085 {
2086 affine_fn t0 = ova1;
2087 affine_fn t2 = ova2;
2088 affine_fn t4 = ovb;
2089
2090 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2091 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2092 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2093 affine_fn_free (t0);
2094 affine_fn_free (t2);
2095 affine_fn_free (t4);
2096 *last_conflicts = last_conflicts_xyz;
2097 }
2098 *overlaps_a = conflict_fn (2, ova1, ova2);
2099 *overlaps_b = conflict_fn (1, ovb);
2100 }
2101 else
2102 {
2103 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2104 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2105 *last_conflicts = integer_zero_node;
2106 }
2107
2108 affine_fn_free (overlaps_a_xz);
2109 affine_fn_free (overlaps_b_xz);
2110 affine_fn_free (overlaps_a_yz);
2111 affine_fn_free (overlaps_b_yz);
2112 affine_fn_free (overlaps_a_xyz);
2113 affine_fn_free (overlaps_b_xyz);
2114 }
2115
2116 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2117
2118 static void
2119 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2120 int size)
2121 {
2122 memcpy (vec2, vec1, size * sizeof (*vec1));
2123 }
2124
2125 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2126
2127 static void
2128 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2129 int m, int n)
2130 {
2131 int i;
2132
2133 for (i = 0; i < m; i++)
2134 lambda_vector_copy (mat1[i], mat2[i], n);
2135 }
2136
2137 /* Store the N x N identity matrix in MAT. */
2138
2139 static void
2140 lambda_matrix_id (lambda_matrix mat, int size)
2141 {
2142 int i, j;
2143
2144 for (i = 0; i < size; i++)
2145 for (j = 0; j < size; j++)
2146 mat[i][j] = (i == j) ? 1 : 0;
2147 }
2148
2149 /* Return the first nonzero element of vector VEC1 between START and N.
2150 We must have START <= N. Returns N if VEC1 is the zero vector. */
2151
2152 static int
2153 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2154 {
2155 int j = start;
2156 while (j < n && vec1[j] == 0)
2157 j++;
2158 return j;
2159 }
2160
2161 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2162 R2 = R2 + CONST1 * R1. */
2163
2164 static void
2165 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2166 {
2167 int i;
2168
2169 if (const1 == 0)
2170 return;
2171
2172 for (i = 0; i < n; i++)
2173 mat[r2][i] += const1 * mat[r1][i];
2174 }
2175
2176 /* Swap rows R1 and R2 in matrix MAT. */
2177
2178 static void
2179 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2180 {
2181 lambda_vector row;
2182
2183 row = mat[r1];
2184 mat[r1] = mat[r2];
2185 mat[r2] = row;
2186 }
2187
2188 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2189 and store the result in VEC2. */
2190
2191 static void
2192 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2193 int size, int const1)
2194 {
2195 int i;
2196
2197 if (const1 == 0)
2198 lambda_vector_clear (vec2, size);
2199 else
2200 for (i = 0; i < size; i++)
2201 vec2[i] = const1 * vec1[i];
2202 }
2203
2204 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2205
2206 static void
2207 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2208 int size)
2209 {
2210 lambda_vector_mult_const (vec1, vec2, size, -1);
2211 }
2212
2213 /* Negate row R1 of matrix MAT which has N columns. */
2214
2215 static void
2216 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2217 {
2218 lambda_vector_negate (mat[r1], mat[r1], n);
2219 }
2220
2221 /* Return true if two vectors are equal. */
2222
2223 static bool
2224 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2225 {
2226 int i;
2227 for (i = 0; i < size; i++)
2228 if (vec1[i] != vec2[i])
2229 return false;
2230 return true;
2231 }
2232
2233 /* Given an M x N integer matrix A, this function determines an M x
2234 M unimodular matrix U, and an M x N echelon matrix S such that
2235 "U.A = S". This decomposition is also known as "right Hermite".
2236
2237 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2238 Restructuring Compilers" Utpal Banerjee. */
2239
2240 static void
2241 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2242 lambda_matrix S, lambda_matrix U)
2243 {
2244 int i, j, i0 = 0;
2245
2246 lambda_matrix_copy (A, S, m, n);
2247 lambda_matrix_id (U, m);
2248
2249 for (j = 0; j < n; j++)
2250 {
2251 if (lambda_vector_first_nz (S[j], m, i0) < m)
2252 {
2253 ++i0;
2254 for (i = m - 1; i >= i0; i--)
2255 {
2256 while (S[i][j] != 0)
2257 {
2258 int sigma, factor, a, b;
2259
2260 a = S[i-1][j];
2261 b = S[i][j];
2262 sigma = (a * b < 0) ? -1: 1;
2263 a = abs (a);
2264 b = abs (b);
2265 factor = sigma * (a / b);
2266
2267 lambda_matrix_row_add (S, n, i, i-1, -factor);
2268 lambda_matrix_row_exchange (S, i, i-1);
2269
2270 lambda_matrix_row_add (U, m, i, i-1, -factor);
2271 lambda_matrix_row_exchange (U, i, i-1);
2272 }
2273 }
2274 }
2275 }
2276 }
2277
2278 /* Determines the overlapping elements due to accesses CHREC_A and
2279 CHREC_B, that are affine functions. This function cannot handle
2280 symbolic evolution functions, ie. when initial conditions are
2281 parameters, because it uses lambda matrices of integers. */
2282
2283 static void
2284 analyze_subscript_affine_affine (tree chrec_a,
2285 tree chrec_b,
2286 conflict_function **overlaps_a,
2287 conflict_function **overlaps_b,
2288 tree *last_conflicts)
2289 {
2290 unsigned nb_vars_a, nb_vars_b, dim;
2291 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2292 lambda_matrix A, U, S;
2293 struct obstack scratch_obstack;
2294
2295 if (eq_evolutions_p (chrec_a, chrec_b))
2296 {
2297 /* The accessed index overlaps for each iteration in the
2298 loop. */
2299 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2300 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2301 *last_conflicts = chrec_dont_know;
2302 return;
2303 }
2304 if (dump_file && (dump_flags & TDF_DETAILS))
2305 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2306
2307 /* For determining the initial intersection, we have to solve a
2308 Diophantine equation. This is the most time consuming part.
2309
2310 For answering to the question: "Is there a dependence?" we have
2311 to prove that there exists a solution to the Diophantine
2312 equation, and that the solution is in the iteration domain,
2313 i.e. the solution is positive or zero, and that the solution
2314 happens before the upper bound loop.nb_iterations. Otherwise
2315 there is no dependence. This function outputs a description of
2316 the iterations that hold the intersections. */
2317
2318 nb_vars_a = nb_vars_in_chrec (chrec_a);
2319 nb_vars_b = nb_vars_in_chrec (chrec_b);
2320
2321 gcc_obstack_init (&scratch_obstack);
2322
2323 dim = nb_vars_a + nb_vars_b;
2324 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2325 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2326 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2327
2328 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2329 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2330 gamma = init_b - init_a;
2331
2332 /* Don't do all the hard work of solving the Diophantine equation
2333 when we already know the solution: for example,
2334 | {3, +, 1}_1
2335 | {3, +, 4}_2
2336 | gamma = 3 - 3 = 0.
2337 Then the first overlap occurs during the first iterations:
2338 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2339 */
2340 if (gamma == 0)
2341 {
2342 if (nb_vars_a == 1 && nb_vars_b == 1)
2343 {
2344 HOST_WIDE_INT step_a, step_b;
2345 HOST_WIDE_INT niter, niter_a, niter_b;
2346 affine_fn ova, ovb;
2347
2348 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2349 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2350 niter = MIN (niter_a, niter_b);
2351 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2352 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2353
2354 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2355 &ova, &ovb,
2356 last_conflicts, 1);
2357 *overlaps_a = conflict_fn (1, ova);
2358 *overlaps_b = conflict_fn (1, ovb);
2359 }
2360
2361 else if (nb_vars_a == 2 && nb_vars_b == 1)
2362 compute_overlap_steps_for_affine_1_2
2363 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2364
2365 else if (nb_vars_a == 1 && nb_vars_b == 2)
2366 compute_overlap_steps_for_affine_1_2
2367 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2368
2369 else
2370 {
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2373 *overlaps_a = conflict_fn_not_known ();
2374 *overlaps_b = conflict_fn_not_known ();
2375 *last_conflicts = chrec_dont_know;
2376 }
2377 goto end_analyze_subs_aa;
2378 }
2379
2380 /* U.A = S */
2381 lambda_matrix_right_hermite (A, dim, 1, S, U);
2382
2383 if (S[0][0] < 0)
2384 {
2385 S[0][0] *= -1;
2386 lambda_matrix_row_negate (U, dim, 0);
2387 }
2388 gcd_alpha_beta = S[0][0];
2389
2390 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2391 but that is a quite strange case. Instead of ICEing, answer
2392 don't know. */
2393 if (gcd_alpha_beta == 0)
2394 {
2395 *overlaps_a = conflict_fn_not_known ();
2396 *overlaps_b = conflict_fn_not_known ();
2397 *last_conflicts = chrec_dont_know;
2398 goto end_analyze_subs_aa;
2399 }
2400
2401 /* The classic "gcd-test". */
2402 if (!int_divides_p (gcd_alpha_beta, gamma))
2403 {
2404 /* The "gcd-test" has determined that there is no integer
2405 solution, i.e. there is no dependence. */
2406 *overlaps_a = conflict_fn_no_dependence ();
2407 *overlaps_b = conflict_fn_no_dependence ();
2408 *last_conflicts = integer_zero_node;
2409 }
2410
2411 /* Both access functions are univariate. This includes SIV and MIV cases. */
2412 else if (nb_vars_a == 1 && nb_vars_b == 1)
2413 {
2414 /* Both functions should have the same evolution sign. */
2415 if (((A[0][0] > 0 && -A[1][0] > 0)
2416 || (A[0][0] < 0 && -A[1][0] < 0)))
2417 {
2418 /* The solutions are given by:
2419 |
2420 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2421 | [u21 u22] [y0]
2422
2423 For a given integer t. Using the following variables,
2424
2425 | i0 = u11 * gamma / gcd_alpha_beta
2426 | j0 = u12 * gamma / gcd_alpha_beta
2427 | i1 = u21
2428 | j1 = u22
2429
2430 the solutions are:
2431
2432 | x0 = i0 + i1 * t,
2433 | y0 = j0 + j1 * t. */
2434 HOST_WIDE_INT i0, j0, i1, j1;
2435
2436 i0 = U[0][0] * gamma / gcd_alpha_beta;
2437 j0 = U[0][1] * gamma / gcd_alpha_beta;
2438 i1 = U[1][0];
2439 j1 = U[1][1];
2440
2441 if ((i1 == 0 && i0 < 0)
2442 || (j1 == 0 && j0 < 0))
2443 {
2444 /* There is no solution.
2445 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2446 falls in here, but for the moment we don't look at the
2447 upper bound of the iteration domain. */
2448 *overlaps_a = conflict_fn_no_dependence ();
2449 *overlaps_b = conflict_fn_no_dependence ();
2450 *last_conflicts = integer_zero_node;
2451 goto end_analyze_subs_aa;
2452 }
2453
2454 if (i1 > 0 && j1 > 0)
2455 {
2456 HOST_WIDE_INT niter_a = max_stmt_executions_int
2457 (get_chrec_loop (chrec_a), true);
2458 HOST_WIDE_INT niter_b = max_stmt_executions_int
2459 (get_chrec_loop (chrec_b), true);
2460 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2461
2462 /* (X0, Y0) is a solution of the Diophantine equation:
2463 "chrec_a (X0) = chrec_b (Y0)". */
2464 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2465 CEIL (-j0, j1));
2466 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2467 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2468
2469 /* (X1, Y1) is the smallest positive solution of the eq
2470 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2471 first conflict occurs. */
2472 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2473 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2474 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2475
2476 if (niter > 0)
2477 {
2478 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2479 FLOOR_DIV (niter - j0, j1));
2480 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2481
2482 /* If the overlap occurs outside of the bounds of the
2483 loop, there is no dependence. */
2484 if (x1 >= niter || y1 >= niter)
2485 {
2486 *overlaps_a = conflict_fn_no_dependence ();
2487 *overlaps_b = conflict_fn_no_dependence ();
2488 *last_conflicts = integer_zero_node;
2489 goto end_analyze_subs_aa;
2490 }
2491 else
2492 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2493 }
2494 else
2495 *last_conflicts = chrec_dont_know;
2496
2497 *overlaps_a
2498 = conflict_fn (1,
2499 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2500 1,
2501 build_int_cst (NULL_TREE, i1)));
2502 *overlaps_b
2503 = conflict_fn (1,
2504 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2505 1,
2506 build_int_cst (NULL_TREE, j1)));
2507 }
2508 else
2509 {
2510 /* FIXME: For the moment, the upper bound of the
2511 iteration domain for i and j is not checked. */
2512 if (dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2514 *overlaps_a = conflict_fn_not_known ();
2515 *overlaps_b = conflict_fn_not_known ();
2516 *last_conflicts = chrec_dont_know;
2517 }
2518 }
2519 else
2520 {
2521 if (dump_file && (dump_flags & TDF_DETAILS))
2522 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2523 *overlaps_a = conflict_fn_not_known ();
2524 *overlaps_b = conflict_fn_not_known ();
2525 *last_conflicts = chrec_dont_know;
2526 }
2527 }
2528 else
2529 {
2530 if (dump_file && (dump_flags & TDF_DETAILS))
2531 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2532 *overlaps_a = conflict_fn_not_known ();
2533 *overlaps_b = conflict_fn_not_known ();
2534 *last_conflicts = chrec_dont_know;
2535 }
2536
2537 end_analyze_subs_aa:
2538 obstack_free (&scratch_obstack, NULL);
2539 if (dump_file && (dump_flags & TDF_DETAILS))
2540 {
2541 fprintf (dump_file, " (overlaps_a = ");
2542 dump_conflict_function (dump_file, *overlaps_a);
2543 fprintf (dump_file, ")\n (overlaps_b = ");
2544 dump_conflict_function (dump_file, *overlaps_b);
2545 fprintf (dump_file, ")\n");
2546 fprintf (dump_file, ")\n");
2547 }
2548 }
2549
2550 /* Returns true when analyze_subscript_affine_affine can be used for
2551 determining the dependence relation between chrec_a and chrec_b,
2552 that contain symbols. This function modifies chrec_a and chrec_b
2553 such that the analysis result is the same, and such that they don't
2554 contain symbols, and then can safely be passed to the analyzer.
2555
2556 Example: The analysis of the following tuples of evolutions produce
2557 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2558 vs. {0, +, 1}_1
2559
2560 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2561 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2562 */
2563
2564 static bool
2565 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2566 {
2567 tree diff, type, left_a, left_b, right_b;
2568
2569 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2570 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2571 /* FIXME: For the moment not handled. Might be refined later. */
2572 return false;
2573
2574 type = chrec_type (*chrec_a);
2575 left_a = CHREC_LEFT (*chrec_a);
2576 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2577 diff = chrec_fold_minus (type, left_a, left_b);
2578
2579 if (!evolution_function_is_constant_p (diff))
2580 return false;
2581
2582 if (dump_file && (dump_flags & TDF_DETAILS))
2583 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2584
2585 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2586 diff, CHREC_RIGHT (*chrec_a));
2587 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2588 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2589 build_int_cst (type, 0),
2590 right_b);
2591 return true;
2592 }
2593
2594 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2595 *OVERLAPS_B are initialized to the functions that describe the
2596 relation between the elements accessed twice by CHREC_A and
2597 CHREC_B. For k >= 0, the following property is verified:
2598
2599 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2600
2601 static void
2602 analyze_siv_subscript (tree chrec_a,
2603 tree chrec_b,
2604 conflict_function **overlaps_a,
2605 conflict_function **overlaps_b,
2606 tree *last_conflicts,
2607 int loop_nest_num)
2608 {
2609 dependence_stats.num_siv++;
2610
2611 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, "(analyze_siv_subscript \n");
2613
2614 if (evolution_function_is_constant_p (chrec_a)
2615 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2616 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2617 overlaps_a, overlaps_b, last_conflicts);
2618
2619 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2620 && evolution_function_is_constant_p (chrec_b))
2621 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2622 overlaps_b, overlaps_a, last_conflicts);
2623
2624 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2625 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2626 {
2627 if (!chrec_contains_symbols (chrec_a)
2628 && !chrec_contains_symbols (chrec_b))
2629 {
2630 analyze_subscript_affine_affine (chrec_a, chrec_b,
2631 overlaps_a, overlaps_b,
2632 last_conflicts);
2633
2634 if (CF_NOT_KNOWN_P (*overlaps_a)
2635 || CF_NOT_KNOWN_P (*overlaps_b))
2636 dependence_stats.num_siv_unimplemented++;
2637 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2638 || CF_NO_DEPENDENCE_P (*overlaps_b))
2639 dependence_stats.num_siv_independent++;
2640 else
2641 dependence_stats.num_siv_dependent++;
2642 }
2643 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2644 &chrec_b))
2645 {
2646 analyze_subscript_affine_affine (chrec_a, chrec_b,
2647 overlaps_a, overlaps_b,
2648 last_conflicts);
2649
2650 if (CF_NOT_KNOWN_P (*overlaps_a)
2651 || CF_NOT_KNOWN_P (*overlaps_b))
2652 dependence_stats.num_siv_unimplemented++;
2653 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2654 || CF_NO_DEPENDENCE_P (*overlaps_b))
2655 dependence_stats.num_siv_independent++;
2656 else
2657 dependence_stats.num_siv_dependent++;
2658 }
2659 else
2660 goto siv_subscript_dontknow;
2661 }
2662
2663 else
2664 {
2665 siv_subscript_dontknow:;
2666 if (dump_file && (dump_flags & TDF_DETAILS))
2667 fprintf (dump_file, "siv test failed: unimplemented.\n");
2668 *overlaps_a = conflict_fn_not_known ();
2669 *overlaps_b = conflict_fn_not_known ();
2670 *last_conflicts = chrec_dont_know;
2671 dependence_stats.num_siv_unimplemented++;
2672 }
2673
2674 if (dump_file && (dump_flags & TDF_DETAILS))
2675 fprintf (dump_file, ")\n");
2676 }
2677
2678 /* Returns false if we can prove that the greatest common divisor of the steps
2679 of CHREC does not divide CST, false otherwise. */
2680
2681 static bool
2682 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2683 {
2684 HOST_WIDE_INT cd = 0, val;
2685 tree step;
2686
2687 if (!host_integerp (cst, 0))
2688 return true;
2689 val = tree_low_cst (cst, 0);
2690
2691 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2692 {
2693 step = CHREC_RIGHT (chrec);
2694 if (!host_integerp (step, 0))
2695 return true;
2696 cd = gcd (cd, tree_low_cst (step, 0));
2697 chrec = CHREC_LEFT (chrec);
2698 }
2699
2700 return val % cd == 0;
2701 }
2702
2703 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2704 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2705 functions that describe the relation between the elements accessed
2706 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2707 is verified:
2708
2709 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2710
2711 static void
2712 analyze_miv_subscript (tree chrec_a,
2713 tree chrec_b,
2714 conflict_function **overlaps_a,
2715 conflict_function **overlaps_b,
2716 tree *last_conflicts,
2717 struct loop *loop_nest)
2718 {
2719 tree type, difference;
2720
2721 dependence_stats.num_miv++;
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "(analyze_miv_subscript \n");
2724
2725 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2726 chrec_a = chrec_convert (type, chrec_a, NULL);
2727 chrec_b = chrec_convert (type, chrec_b, NULL);
2728 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2729
2730 if (eq_evolutions_p (chrec_a, chrec_b))
2731 {
2732 /* Access functions are the same: all the elements are accessed
2733 in the same order. */
2734 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2735 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2736 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2737 dependence_stats.num_miv_dependent++;
2738 }
2739
2740 else if (evolution_function_is_constant_p (difference)
2741 /* For the moment, the following is verified:
2742 evolution_function_is_affine_multivariate_p (chrec_a,
2743 loop_nest->num) */
2744 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2745 {
2746 /* testsuite/.../ssa-chrec-33.c
2747 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2748
2749 The difference is 1, and all the evolution steps are multiples
2750 of 2, consequently there are no overlapping elements. */
2751 *overlaps_a = conflict_fn_no_dependence ();
2752 *overlaps_b = conflict_fn_no_dependence ();
2753 *last_conflicts = integer_zero_node;
2754 dependence_stats.num_miv_independent++;
2755 }
2756
2757 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2758 && !chrec_contains_symbols (chrec_a)
2759 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2760 && !chrec_contains_symbols (chrec_b))
2761 {
2762 /* testsuite/.../ssa-chrec-35.c
2763 {0, +, 1}_2 vs. {0, +, 1}_3
2764 the overlapping elements are respectively located at iterations:
2765 {0, +, 1}_x and {0, +, 1}_x,
2766 in other words, we have the equality:
2767 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2768
2769 Other examples:
2770 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2771 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2772
2773 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2774 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2775 */
2776 analyze_subscript_affine_affine (chrec_a, chrec_b,
2777 overlaps_a, overlaps_b, last_conflicts);
2778
2779 if (CF_NOT_KNOWN_P (*overlaps_a)
2780 || CF_NOT_KNOWN_P (*overlaps_b))
2781 dependence_stats.num_miv_unimplemented++;
2782 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2783 || CF_NO_DEPENDENCE_P (*overlaps_b))
2784 dependence_stats.num_miv_independent++;
2785 else
2786 dependence_stats.num_miv_dependent++;
2787 }
2788
2789 else
2790 {
2791 /* When the analysis is too difficult, answer "don't know". */
2792 if (dump_file && (dump_flags & TDF_DETAILS))
2793 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2794
2795 *overlaps_a = conflict_fn_not_known ();
2796 *overlaps_b = conflict_fn_not_known ();
2797 *last_conflicts = chrec_dont_know;
2798 dependence_stats.num_miv_unimplemented++;
2799 }
2800
2801 if (dump_file && (dump_flags & TDF_DETAILS))
2802 fprintf (dump_file, ")\n");
2803 }
2804
2805 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2806 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2807 OVERLAP_ITERATIONS_B are initialized with two functions that
2808 describe the iterations that contain conflicting elements.
2809
2810 Remark: For an integer k >= 0, the following equality is true:
2811
2812 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2813 */
2814
2815 static void
2816 analyze_overlapping_iterations (tree chrec_a,
2817 tree chrec_b,
2818 conflict_function **overlap_iterations_a,
2819 conflict_function **overlap_iterations_b,
2820 tree *last_conflicts, struct loop *loop_nest)
2821 {
2822 unsigned int lnn = loop_nest->num;
2823
2824 dependence_stats.num_subscript_tests++;
2825
2826 if (dump_file && (dump_flags & TDF_DETAILS))
2827 {
2828 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2829 fprintf (dump_file, " (chrec_a = ");
2830 print_generic_expr (dump_file, chrec_a, 0);
2831 fprintf (dump_file, ")\n (chrec_b = ");
2832 print_generic_expr (dump_file, chrec_b, 0);
2833 fprintf (dump_file, ")\n");
2834 }
2835
2836 if (chrec_a == NULL_TREE
2837 || chrec_b == NULL_TREE
2838 || chrec_contains_undetermined (chrec_a)
2839 || chrec_contains_undetermined (chrec_b))
2840 {
2841 dependence_stats.num_subscript_undetermined++;
2842
2843 *overlap_iterations_a = conflict_fn_not_known ();
2844 *overlap_iterations_b = conflict_fn_not_known ();
2845 }
2846
2847 /* If they are the same chrec, and are affine, they overlap
2848 on every iteration. */
2849 else if (eq_evolutions_p (chrec_a, chrec_b)
2850 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2851 || operand_equal_p (chrec_a, chrec_b, 0)))
2852 {
2853 dependence_stats.num_same_subscript_function++;
2854 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2855 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2856 *last_conflicts = chrec_dont_know;
2857 }
2858
2859 /* If they aren't the same, and aren't affine, we can't do anything
2860 yet. */
2861 else if ((chrec_contains_symbols (chrec_a)
2862 || chrec_contains_symbols (chrec_b))
2863 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2864 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2865 {
2866 dependence_stats.num_subscript_undetermined++;
2867 *overlap_iterations_a = conflict_fn_not_known ();
2868 *overlap_iterations_b = conflict_fn_not_known ();
2869 }
2870
2871 else if (ziv_subscript_p (chrec_a, chrec_b))
2872 analyze_ziv_subscript (chrec_a, chrec_b,
2873 overlap_iterations_a, overlap_iterations_b,
2874 last_conflicts);
2875
2876 else if (siv_subscript_p (chrec_a, chrec_b))
2877 analyze_siv_subscript (chrec_a, chrec_b,
2878 overlap_iterations_a, overlap_iterations_b,
2879 last_conflicts, lnn);
2880
2881 else
2882 analyze_miv_subscript (chrec_a, chrec_b,
2883 overlap_iterations_a, overlap_iterations_b,
2884 last_conflicts, loop_nest);
2885
2886 if (dump_file && (dump_flags & TDF_DETAILS))
2887 {
2888 fprintf (dump_file, " (overlap_iterations_a = ");
2889 dump_conflict_function (dump_file, *overlap_iterations_a);
2890 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2891 dump_conflict_function (dump_file, *overlap_iterations_b);
2892 fprintf (dump_file, ")\n");
2893 fprintf (dump_file, ")\n");
2894 }
2895 }
2896
2897 /* Helper function for uniquely inserting distance vectors. */
2898
2899 static void
2900 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2901 {
2902 unsigned i;
2903 lambda_vector v;
2904
2905 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2906 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2907 return;
2908
2909 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2910 }
2911
2912 /* Helper function for uniquely inserting direction vectors. */
2913
2914 static void
2915 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2916 {
2917 unsigned i;
2918 lambda_vector v;
2919
2920 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2921 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2922 return;
2923
2924 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2925 }
2926
2927 /* Add a distance of 1 on all the loops outer than INDEX. If we
2928 haven't yet determined a distance for this outer loop, push a new
2929 distance vector composed of the previous distance, and a distance
2930 of 1 for this outer loop. Example:
2931
2932 | loop_1
2933 | loop_2
2934 | A[10]
2935 | endloop_2
2936 | endloop_1
2937
2938 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2939 save (0, 1), then we have to save (1, 0). */
2940
2941 static void
2942 add_outer_distances (struct data_dependence_relation *ddr,
2943 lambda_vector dist_v, int index)
2944 {
2945 /* For each outer loop where init_v is not set, the accesses are
2946 in dependence of distance 1 in the loop. */
2947 while (--index >= 0)
2948 {
2949 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2950 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2951 save_v[index] = 1;
2952 save_dist_v (ddr, save_v);
2953 }
2954 }
2955
2956 /* Return false when fail to represent the data dependence as a
2957 distance vector. INIT_B is set to true when a component has been
2958 added to the distance vector DIST_V. INDEX_CARRY is then set to
2959 the index in DIST_V that carries the dependence. */
2960
2961 static bool
2962 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2963 struct data_reference *ddr_a,
2964 struct data_reference *ddr_b,
2965 lambda_vector dist_v, bool *init_b,
2966 int *index_carry)
2967 {
2968 unsigned i;
2969 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2970
2971 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2972 {
2973 tree access_fn_a, access_fn_b;
2974 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2975
2976 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2977 {
2978 non_affine_dependence_relation (ddr);
2979 return false;
2980 }
2981
2982 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2983 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2984
2985 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2986 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2987 {
2988 int dist, index;
2989 int var_a = CHREC_VARIABLE (access_fn_a);
2990 int var_b = CHREC_VARIABLE (access_fn_b);
2991
2992 if (var_a != var_b
2993 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2994 {
2995 non_affine_dependence_relation (ddr);
2996 return false;
2997 }
2998
2999 dist = int_cst_value (SUB_DISTANCE (subscript));
3000 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3001 *index_carry = MIN (index, *index_carry);
3002
3003 /* This is the subscript coupling test. If we have already
3004 recorded a distance for this loop (a distance coming from
3005 another subscript), it should be the same. For example,
3006 in the following code, there is no dependence:
3007
3008 | loop i = 0, N, 1
3009 | T[i+1][i] = ...
3010 | ... = T[i][i]
3011 | endloop
3012 */
3013 if (init_v[index] != 0 && dist_v[index] != dist)
3014 {
3015 finalize_ddr_dependent (ddr, chrec_known);
3016 return false;
3017 }
3018
3019 dist_v[index] = dist;
3020 init_v[index] = 1;
3021 *init_b = true;
3022 }
3023 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3024 {
3025 /* This can be for example an affine vs. constant dependence
3026 (T[i] vs. T[3]) that is not an affine dependence and is
3027 not representable as a distance vector. */
3028 non_affine_dependence_relation (ddr);
3029 return false;
3030 }
3031 }
3032
3033 return true;
3034 }
3035
3036 /* Return true when the DDR contains only constant access functions. */
3037
3038 static bool
3039 constant_access_functions (const struct data_dependence_relation *ddr)
3040 {
3041 unsigned i;
3042
3043 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3044 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3045 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3046 return false;
3047
3048 return true;
3049 }
3050
3051 /* Helper function for the case where DDR_A and DDR_B are the same
3052 multivariate access function with a constant step. For an example
3053 see pr34635-1.c. */
3054
3055 static void
3056 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3057 {
3058 int x_1, x_2;
3059 tree c_1 = CHREC_LEFT (c_2);
3060 tree c_0 = CHREC_LEFT (c_1);
3061 lambda_vector dist_v;
3062 int v1, v2, cd;
3063
3064 /* Polynomials with more than 2 variables are not handled yet. When
3065 the evolution steps are parameters, it is not possible to
3066 represent the dependence using classical distance vectors. */
3067 if (TREE_CODE (c_0) != INTEGER_CST
3068 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3069 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3070 {
3071 DDR_AFFINE_P (ddr) = false;
3072 return;
3073 }
3074
3075 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3076 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3077
3078 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3079 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3080 v1 = int_cst_value (CHREC_RIGHT (c_1));
3081 v2 = int_cst_value (CHREC_RIGHT (c_2));
3082 cd = gcd (v1, v2);
3083 v1 /= cd;
3084 v2 /= cd;
3085
3086 if (v2 < 0)
3087 {
3088 v2 = -v2;
3089 v1 = -v1;
3090 }
3091
3092 dist_v[x_1] = v2;
3093 dist_v[x_2] = -v1;
3094 save_dist_v (ddr, dist_v);
3095
3096 add_outer_distances (ddr, dist_v, x_1);
3097 }
3098
3099 /* Helper function for the case where DDR_A and DDR_B are the same
3100 access functions. */
3101
3102 static void
3103 add_other_self_distances (struct data_dependence_relation *ddr)
3104 {
3105 lambda_vector dist_v;
3106 unsigned i;
3107 int index_carry = DDR_NB_LOOPS (ddr);
3108
3109 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3110 {
3111 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3112
3113 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3114 {
3115 if (!evolution_function_is_univariate_p (access_fun))
3116 {
3117 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3118 {
3119 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3120 return;
3121 }
3122
3123 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3124
3125 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3126 add_multivariate_self_dist (ddr, access_fun);
3127 else
3128 /* The evolution step is not constant: it varies in
3129 the outer loop, so this cannot be represented by a
3130 distance vector. For example in pr34635.c the
3131 evolution is {0, +, {0, +, 4}_1}_2. */
3132 DDR_AFFINE_P (ddr) = false;
3133
3134 return;
3135 }
3136
3137 index_carry = MIN (index_carry,
3138 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3139 DDR_LOOP_NEST (ddr)));
3140 }
3141 }
3142
3143 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3144 add_outer_distances (ddr, dist_v, index_carry);
3145 }
3146
3147 static void
3148 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3149 {
3150 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3151
3152 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3153 save_dist_v (ddr, dist_v);
3154 }
3155
3156 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3157 is the case for example when access functions are the same and
3158 equal to a constant, as in:
3159
3160 | loop_1
3161 | A[3] = ...
3162 | ... = A[3]
3163 | endloop_1
3164
3165 in which case the distance vectors are (0) and (1). */
3166
3167 static void
3168 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3169 {
3170 unsigned i, j;
3171
3172 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3173 {
3174 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3175 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3176 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3177
3178 for (j = 0; j < ca->n; j++)
3179 if (affine_function_zero_p (ca->fns[j]))
3180 {
3181 insert_innermost_unit_dist_vector (ddr);
3182 return;
3183 }
3184
3185 for (j = 0; j < cb->n; j++)
3186 if (affine_function_zero_p (cb->fns[j]))
3187 {
3188 insert_innermost_unit_dist_vector (ddr);
3189 return;
3190 }
3191 }
3192 }
3193
3194 /* Compute the classic per loop distance vector. DDR is the data
3195 dependence relation to build a vector from. Return false when fail
3196 to represent the data dependence as a distance vector. */
3197
3198 static bool
3199 build_classic_dist_vector (struct data_dependence_relation *ddr,
3200 struct loop *loop_nest)
3201 {
3202 bool init_b = false;
3203 int index_carry = DDR_NB_LOOPS (ddr);
3204 lambda_vector dist_v;
3205
3206 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3207 return false;
3208
3209 if (same_access_functions (ddr))
3210 {
3211 /* Save the 0 vector. */
3212 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3213 save_dist_v (ddr, dist_v);
3214
3215 if (constant_access_functions (ddr))
3216 add_distance_for_zero_overlaps (ddr);
3217
3218 if (DDR_NB_LOOPS (ddr) > 1)
3219 add_other_self_distances (ddr);
3220
3221 return true;
3222 }
3223
3224 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3225 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3226 dist_v, &init_b, &index_carry))
3227 return false;
3228
3229 /* Save the distance vector if we initialized one. */
3230 if (init_b)
3231 {
3232 /* Verify a basic constraint: classic distance vectors should
3233 always be lexicographically positive.
3234
3235 Data references are collected in the order of execution of
3236 the program, thus for the following loop
3237
3238 | for (i = 1; i < 100; i++)
3239 | for (j = 1; j < 100; j++)
3240 | {
3241 | t = T[j+1][i-1]; // A
3242 | T[j][i] = t + 2; // B
3243 | }
3244
3245 references are collected following the direction of the wind:
3246 A then B. The data dependence tests are performed also
3247 following this order, such that we're looking at the distance
3248 separating the elements accessed by A from the elements later
3249 accessed by B. But in this example, the distance returned by
3250 test_dep (A, B) is lexicographically negative (-1, 1), that
3251 means that the access A occurs later than B with respect to
3252 the outer loop, ie. we're actually looking upwind. In this
3253 case we solve test_dep (B, A) looking downwind to the
3254 lexicographically positive solution, that returns the
3255 distance vector (1, -1). */
3256 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3257 {
3258 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3259 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3260 loop_nest))
3261 return false;
3262 compute_subscript_distance (ddr);
3263 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3264 save_v, &init_b, &index_carry))
3265 return false;
3266 save_dist_v (ddr, save_v);
3267 DDR_REVERSED_P (ddr) = true;
3268
3269 /* In this case there is a dependence forward for all the
3270 outer loops:
3271
3272 | for (k = 1; k < 100; k++)
3273 | for (i = 1; i < 100; i++)
3274 | for (j = 1; j < 100; j++)
3275 | {
3276 | t = T[j+1][i-1]; // A
3277 | T[j][i] = t + 2; // B
3278 | }
3279
3280 the vectors are:
3281 (0, 1, -1)
3282 (1, 1, -1)
3283 (1, -1, 1)
3284 */
3285 if (DDR_NB_LOOPS (ddr) > 1)
3286 {
3287 add_outer_distances (ddr, save_v, index_carry);
3288 add_outer_distances (ddr, dist_v, index_carry);
3289 }
3290 }
3291 else
3292 {
3293 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3294 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3295
3296 if (DDR_NB_LOOPS (ddr) > 1)
3297 {
3298 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3299
3300 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3301 DDR_A (ddr), loop_nest))
3302 return false;
3303 compute_subscript_distance (ddr);
3304 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3305 opposite_v, &init_b,
3306 &index_carry))
3307 return false;
3308
3309 save_dist_v (ddr, save_v);
3310 add_outer_distances (ddr, dist_v, index_carry);
3311 add_outer_distances (ddr, opposite_v, index_carry);
3312 }
3313 else
3314 save_dist_v (ddr, save_v);
3315 }
3316 }
3317 else
3318 {
3319 /* There is a distance of 1 on all the outer loops: Example:
3320 there is a dependence of distance 1 on loop_1 for the array A.
3321
3322 | loop_1
3323 | A[5] = ...
3324 | endloop
3325 */
3326 add_outer_distances (ddr, dist_v,
3327 lambda_vector_first_nz (dist_v,
3328 DDR_NB_LOOPS (ddr), 0));
3329 }
3330
3331 if (dump_file && (dump_flags & TDF_DETAILS))
3332 {
3333 unsigned i;
3334
3335 fprintf (dump_file, "(build_classic_dist_vector\n");
3336 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3337 {
3338 fprintf (dump_file, " dist_vector = (");
3339 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3340 DDR_NB_LOOPS (ddr));
3341 fprintf (dump_file, " )\n");
3342 }
3343 fprintf (dump_file, ")\n");
3344 }
3345
3346 return true;
3347 }
3348
3349 /* Return the direction for a given distance.
3350 FIXME: Computing dir this way is suboptimal, since dir can catch
3351 cases that dist is unable to represent. */
3352
3353 static inline enum data_dependence_direction
3354 dir_from_dist (int dist)
3355 {
3356 if (dist > 0)
3357 return dir_positive;
3358 else if (dist < 0)
3359 return dir_negative;
3360 else
3361 return dir_equal;
3362 }
3363
3364 /* Compute the classic per loop direction vector. DDR is the data
3365 dependence relation to build a vector from. */
3366
3367 static void
3368 build_classic_dir_vector (struct data_dependence_relation *ddr)
3369 {
3370 unsigned i, j;
3371 lambda_vector dist_v;
3372
3373 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3374 {
3375 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3376
3377 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3378 dir_v[j] = dir_from_dist (dist_v[j]);
3379
3380 save_dir_v (ddr, dir_v);
3381 }
3382 }
3383
3384 /* Helper function. Returns true when there is a dependence between
3385 data references DRA and DRB. */
3386
3387 static bool
3388 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3389 struct data_reference *dra,
3390 struct data_reference *drb,
3391 struct loop *loop_nest)
3392 {
3393 unsigned int i;
3394 tree last_conflicts;
3395 struct subscript *subscript;
3396
3397 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3398 i++)
3399 {
3400 conflict_function *overlaps_a, *overlaps_b;
3401
3402 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3403 DR_ACCESS_FN (drb, i),
3404 &overlaps_a, &overlaps_b,
3405 &last_conflicts, loop_nest);
3406
3407 if (CF_NOT_KNOWN_P (overlaps_a)
3408 || CF_NOT_KNOWN_P (overlaps_b))
3409 {
3410 finalize_ddr_dependent (ddr, chrec_dont_know);
3411 dependence_stats.num_dependence_undetermined++;
3412 free_conflict_function (overlaps_a);
3413 free_conflict_function (overlaps_b);
3414 return false;
3415 }
3416
3417 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3418 || CF_NO_DEPENDENCE_P (overlaps_b))
3419 {
3420 finalize_ddr_dependent (ddr, chrec_known);
3421 dependence_stats.num_dependence_independent++;
3422 free_conflict_function (overlaps_a);
3423 free_conflict_function (overlaps_b);
3424 return false;
3425 }
3426
3427 else
3428 {
3429 if (SUB_CONFLICTS_IN_A (subscript))
3430 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3431 if (SUB_CONFLICTS_IN_B (subscript))
3432 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3433
3434 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3435 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3436 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3437 }
3438 }
3439
3440 return true;
3441 }
3442
3443 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3444
3445 static void
3446 subscript_dependence_tester (struct data_dependence_relation *ddr,
3447 struct loop *loop_nest)
3448 {
3449
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, "(subscript_dependence_tester \n");
3452
3453 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3454 dependence_stats.num_dependence_dependent++;
3455
3456 compute_subscript_distance (ddr);
3457 if (build_classic_dist_vector (ddr, loop_nest))
3458 build_classic_dir_vector (ddr);
3459
3460 if (dump_file && (dump_flags & TDF_DETAILS))
3461 fprintf (dump_file, ")\n");
3462 }
3463
3464 /* Returns true when all the access functions of A are affine or
3465 constant with respect to LOOP_NEST. */
3466
3467 static bool
3468 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3469 const struct loop *loop_nest)
3470 {
3471 unsigned int i;
3472 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3473 tree t;
3474
3475 FOR_EACH_VEC_ELT (tree, fns, i, t)
3476 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3477 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3478 return false;
3479
3480 return true;
3481 }
3482
3483 /* Initializes an equation for an OMEGA problem using the information
3484 contained in the ACCESS_FUN. Returns true when the operation
3485 succeeded.
3486
3487 PB is the omega constraint system.
3488 EQ is the number of the equation to be initialized.
3489 OFFSET is used for shifting the variables names in the constraints:
3490 a constrain is composed of 2 * the number of variables surrounding
3491 dependence accesses. OFFSET is set either to 0 for the first n variables,
3492 then it is set to n.
3493 ACCESS_FUN is expected to be an affine chrec. */
3494
3495 static bool
3496 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3497 unsigned int offset, tree access_fun,
3498 struct data_dependence_relation *ddr)
3499 {
3500 switch (TREE_CODE (access_fun))
3501 {
3502 case POLYNOMIAL_CHREC:
3503 {
3504 tree left = CHREC_LEFT (access_fun);
3505 tree right = CHREC_RIGHT (access_fun);
3506 int var = CHREC_VARIABLE (access_fun);
3507 unsigned var_idx;
3508
3509 if (TREE_CODE (right) != INTEGER_CST)
3510 return false;
3511
3512 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3513 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3514
3515 /* Compute the innermost loop index. */
3516 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3517
3518 if (offset == 0)
3519 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3520 += int_cst_value (right);
3521
3522 switch (TREE_CODE (left))
3523 {
3524 case POLYNOMIAL_CHREC:
3525 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3526
3527 case INTEGER_CST:
3528 pb->eqs[eq].coef[0] += int_cst_value (left);
3529 return true;
3530
3531 default:
3532 return false;
3533 }
3534 }
3535
3536 case INTEGER_CST:
3537 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3538 return true;
3539
3540 default:
3541 return false;
3542 }
3543 }
3544
3545 /* As explained in the comments preceding init_omega_for_ddr, we have
3546 to set up a system for each loop level, setting outer loops
3547 variation to zero, and current loop variation to positive or zero.
3548 Save each lexico positive distance vector. */
3549
3550 static void
3551 omega_extract_distance_vectors (omega_pb pb,
3552 struct data_dependence_relation *ddr)
3553 {
3554 int eq, geq;
3555 unsigned i, j;
3556 struct loop *loopi, *loopj;
3557 enum omega_result res;
3558
3559 /* Set a new problem for each loop in the nest. The basis is the
3560 problem that we have initialized until now. On top of this we
3561 add new constraints. */
3562 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3563 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3564 {
3565 int dist = 0;
3566 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3567 DDR_NB_LOOPS (ddr));
3568
3569 omega_copy_problem (copy, pb);
3570
3571 /* For all the outer loops "loop_j", add "dj = 0". */
3572 for (j = 0;
3573 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3574 {
3575 eq = omega_add_zero_eq (copy, omega_black);
3576 copy->eqs[eq].coef[j + 1] = 1;
3577 }
3578
3579 /* For "loop_i", add "0 <= di". */
3580 geq = omega_add_zero_geq (copy, omega_black);
3581 copy->geqs[geq].coef[i + 1] = 1;
3582
3583 /* Reduce the constraint system, and test that the current
3584 problem is feasible. */
3585 res = omega_simplify_problem (copy);
3586 if (res == omega_false
3587 || res == omega_unknown
3588 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3589 goto next_problem;
3590
3591 for (eq = 0; eq < copy->num_subs; eq++)
3592 if (copy->subs[eq].key == (int) i + 1)
3593 {
3594 dist = copy->subs[eq].coef[0];
3595 goto found_dist;
3596 }
3597
3598 if (dist == 0)
3599 {
3600 /* Reinitialize problem... */
3601 omega_copy_problem (copy, pb);
3602 for (j = 0;
3603 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3604 {
3605 eq = omega_add_zero_eq (copy, omega_black);
3606 copy->eqs[eq].coef[j + 1] = 1;
3607 }
3608
3609 /* ..., but this time "di = 1". */
3610 eq = omega_add_zero_eq (copy, omega_black);
3611 copy->eqs[eq].coef[i + 1] = 1;
3612 copy->eqs[eq].coef[0] = -1;
3613
3614 res = omega_simplify_problem (copy);
3615 if (res == omega_false
3616 || res == omega_unknown
3617 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3618 goto next_problem;
3619
3620 for (eq = 0; eq < copy->num_subs; eq++)
3621 if (copy->subs[eq].key == (int) i + 1)
3622 {
3623 dist = copy->subs[eq].coef[0];
3624 goto found_dist;
3625 }
3626 }
3627
3628 found_dist:;
3629 /* Save the lexicographically positive distance vector. */
3630 if (dist >= 0)
3631 {
3632 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3633 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3634
3635 dist_v[i] = dist;
3636
3637 for (eq = 0; eq < copy->num_subs; eq++)
3638 if (copy->subs[eq].key > 0)
3639 {
3640 dist = copy->subs[eq].coef[0];
3641 dist_v[copy->subs[eq].key - 1] = dist;
3642 }
3643
3644 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3645 dir_v[j] = dir_from_dist (dist_v[j]);
3646
3647 save_dist_v (ddr, dist_v);
3648 save_dir_v (ddr, dir_v);
3649 }
3650
3651 next_problem:;
3652 omega_free_problem (copy);
3653 }
3654 }
3655
3656 /* This is called for each subscript of a tuple of data references:
3657 insert an equality for representing the conflicts. */
3658
3659 static bool
3660 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3661 struct data_dependence_relation *ddr,
3662 omega_pb pb, bool *maybe_dependent)
3663 {
3664 int eq;
3665 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3666 TREE_TYPE (access_fun_b));
3667 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3668 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3669 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3670 tree minus_one;
3671
3672 /* When the fun_a - fun_b is not constant, the dependence is not
3673 captured by the classic distance vector representation. */
3674 if (TREE_CODE (difference) != INTEGER_CST)
3675 return false;
3676
3677 /* ZIV test. */
3678 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3679 {
3680 /* There is no dependence. */
3681 *maybe_dependent = false;
3682 return true;
3683 }
3684
3685 minus_one = build_int_cst (type, -1);
3686 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3687
3688 eq = omega_add_zero_eq (pb, omega_black);
3689 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3690 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3691 /* There is probably a dependence, but the system of
3692 constraints cannot be built: answer "don't know". */
3693 return false;
3694
3695 /* GCD test. */
3696 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3697 && !int_divides_p (lambda_vector_gcd
3698 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3699 2 * DDR_NB_LOOPS (ddr)),
3700 pb->eqs[eq].coef[0]))
3701 {
3702 /* There is no dependence. */
3703 *maybe_dependent = false;
3704 return true;
3705 }
3706
3707 return true;
3708 }
3709
3710 /* Helper function, same as init_omega_for_ddr but specialized for
3711 data references A and B. */
3712
3713 static bool
3714 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3715 struct data_dependence_relation *ddr,
3716 omega_pb pb, bool *maybe_dependent)
3717 {
3718 unsigned i;
3719 int ineq;
3720 struct loop *loopi;
3721 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3722
3723 /* Insert an equality per subscript. */
3724 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3725 {
3726 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3727 ddr, pb, maybe_dependent))
3728 return false;
3729 else if (*maybe_dependent == false)
3730 {
3731 /* There is no dependence. */
3732 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3733 return true;
3734 }
3735 }
3736
3737 /* Insert inequalities: constraints corresponding to the iteration
3738 domain, i.e. the loops surrounding the references "loop_x" and
3739 the distance variables "dx". The layout of the OMEGA
3740 representation is as follows:
3741 - coef[0] is the constant
3742 - coef[1..nb_loops] are the protected variables that will not be
3743 removed by the solver: the "dx"
3744 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3745 */
3746 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3747 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3748 {
3749 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3750
3751 /* 0 <= loop_x */
3752 ineq = omega_add_zero_geq (pb, omega_black);
3753 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3754
3755 /* 0 <= loop_x + dx */
3756 ineq = omega_add_zero_geq (pb, omega_black);
3757 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3758 pb->geqs[ineq].coef[i + 1] = 1;
3759
3760 if (nbi != -1)
3761 {
3762 /* loop_x <= nb_iters */
3763 ineq = omega_add_zero_geq (pb, omega_black);
3764 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3765 pb->geqs[ineq].coef[0] = nbi;
3766
3767 /* loop_x + dx <= nb_iters */
3768 ineq = omega_add_zero_geq (pb, omega_black);
3769 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3770 pb->geqs[ineq].coef[i + 1] = -1;
3771 pb->geqs[ineq].coef[0] = nbi;
3772
3773 /* A step "dx" bigger than nb_iters is not feasible, so
3774 add "0 <= nb_iters + dx", */
3775 ineq = omega_add_zero_geq (pb, omega_black);
3776 pb->geqs[ineq].coef[i + 1] = 1;
3777 pb->geqs[ineq].coef[0] = nbi;
3778 /* and "dx <= nb_iters". */
3779 ineq = omega_add_zero_geq (pb, omega_black);
3780 pb->geqs[ineq].coef[i + 1] = -1;
3781 pb->geqs[ineq].coef[0] = nbi;
3782 }
3783 }
3784
3785 omega_extract_distance_vectors (pb, ddr);
3786
3787 return true;
3788 }
3789
3790 /* Sets up the Omega dependence problem for the data dependence
3791 relation DDR. Returns false when the constraint system cannot be
3792 built, ie. when the test answers "don't know". Returns true
3793 otherwise, and when independence has been proved (using one of the
3794 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3795 set MAYBE_DEPENDENT to true.
3796
3797 Example: for setting up the dependence system corresponding to the
3798 conflicting accesses
3799
3800 | loop_i
3801 | loop_j
3802 | A[i, i+1] = ...
3803 | ... A[2*j, 2*(i + j)]
3804 | endloop_j
3805 | endloop_i
3806
3807 the following constraints come from the iteration domain:
3808
3809 0 <= i <= Ni
3810 0 <= i + di <= Ni
3811 0 <= j <= Nj
3812 0 <= j + dj <= Nj
3813
3814 where di, dj are the distance variables. The constraints
3815 representing the conflicting elements are:
3816
3817 i = 2 * (j + dj)
3818 i + 1 = 2 * (i + di + j + dj)
3819
3820 For asking that the resulting distance vector (di, dj) be
3821 lexicographically positive, we insert the constraint "di >= 0". If
3822 "di = 0" in the solution, we fix that component to zero, and we
3823 look at the inner loops: we set a new problem where all the outer
3824 loop distances are zero, and fix this inner component to be
3825 positive. When one of the components is positive, we save that
3826 distance, and set a new problem where the distance on this loop is
3827 zero, searching for other distances in the inner loops. Here is
3828 the classic example that illustrates that we have to set for each
3829 inner loop a new problem:
3830
3831 | loop_1
3832 | loop_2
3833 | A[10]
3834 | endloop_2
3835 | endloop_1
3836
3837 we have to save two distances (1, 0) and (0, 1).
3838
3839 Given two array references, refA and refB, we have to set the
3840 dependence problem twice, refA vs. refB and refB vs. refA, and we
3841 cannot do a single test, as refB might occur before refA in the
3842 inner loops, and the contrary when considering outer loops: ex.
3843
3844 | loop_0
3845 | loop_1
3846 | loop_2
3847 | T[{1,+,1}_2][{1,+,1}_1] // refA
3848 | T[{2,+,1}_2][{0,+,1}_1] // refB
3849 | endloop_2
3850 | endloop_1
3851 | endloop_0
3852
3853 refB touches the elements in T before refA, and thus for the same
3854 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3855 but for successive loop_0 iterations, we have (1, -1, 1)
3856
3857 The Omega solver expects the distance variables ("di" in the
3858 previous example) to come first in the constraint system (as
3859 variables to be protected, or "safe" variables), the constraint
3860 system is built using the following layout:
3861
3862 "cst | distance vars | index vars".
3863 */
3864
3865 static bool
3866 init_omega_for_ddr (struct data_dependence_relation *ddr,
3867 bool *maybe_dependent)
3868 {
3869 omega_pb pb;
3870 bool res = false;
3871
3872 *maybe_dependent = true;
3873
3874 if (same_access_functions (ddr))
3875 {
3876 unsigned j;
3877 lambda_vector dir_v;
3878
3879 /* Save the 0 vector. */
3880 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3881 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3882 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3883 dir_v[j] = dir_equal;
3884 save_dir_v (ddr, dir_v);
3885
3886 /* Save the dependences carried by outer loops. */
3887 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3888 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3889 maybe_dependent);
3890 omega_free_problem (pb);
3891 return res;
3892 }
3893
3894 /* Omega expects the protected variables (those that have to be kept
3895 after elimination) to appear first in the constraint system.
3896 These variables are the distance variables. In the following
3897 initialization we declare NB_LOOPS safe variables, and the total
3898 number of variables for the constraint system is 2*NB_LOOPS. */
3899 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3900 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3901 maybe_dependent);
3902 omega_free_problem (pb);
3903
3904 /* Stop computation if not decidable, or no dependence. */
3905 if (res == false || *maybe_dependent == false)
3906 return res;
3907
3908 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3909 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3910 maybe_dependent);
3911 omega_free_problem (pb);
3912
3913 return res;
3914 }
3915
3916 /* Return true when DDR contains the same information as that stored
3917 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3918
3919 static bool
3920 ddr_consistent_p (FILE *file,
3921 struct data_dependence_relation *ddr,
3922 VEC (lambda_vector, heap) *dist_vects,
3923 VEC (lambda_vector, heap) *dir_vects)
3924 {
3925 unsigned int i, j;
3926
3927 /* If dump_file is set, output there. */
3928 if (dump_file && (dump_flags & TDF_DETAILS))
3929 file = dump_file;
3930
3931 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3932 {
3933 lambda_vector b_dist_v;
3934 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3935 VEC_length (lambda_vector, dist_vects),
3936 DDR_NUM_DIST_VECTS (ddr));
3937
3938 fprintf (file, "Banerjee dist vectors:\n");
3939 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3940 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3941
3942 fprintf (file, "Omega dist vectors:\n");
3943 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3944 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3945
3946 fprintf (file, "data dependence relation:\n");
3947 dump_data_dependence_relation (file, ddr);
3948
3949 fprintf (file, ")\n");
3950 return false;
3951 }
3952
3953 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3954 {
3955 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3956 VEC_length (lambda_vector, dir_vects),
3957 DDR_NUM_DIR_VECTS (ddr));
3958 return false;
3959 }
3960
3961 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3962 {
3963 lambda_vector a_dist_v;
3964 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3965
3966 /* Distance vectors are not ordered in the same way in the DDR
3967 and in the DIST_VECTS: search for a matching vector. */
3968 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3969 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3970 break;
3971
3972 if (j == VEC_length (lambda_vector, dist_vects))
3973 {
3974 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3975 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3976 fprintf (file, "not found in Omega dist vectors:\n");
3977 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3978 fprintf (file, "data dependence relation:\n");
3979 dump_data_dependence_relation (file, ddr);
3980 fprintf (file, ")\n");
3981 }
3982 }
3983
3984 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3985 {
3986 lambda_vector a_dir_v;
3987 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3988
3989 /* Direction vectors are not ordered in the same way in the DDR
3990 and in the DIR_VECTS: search for a matching vector. */
3991 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3992 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3993 break;
3994
3995 if (j == VEC_length (lambda_vector, dist_vects))
3996 {
3997 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3998 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3999 fprintf (file, "not found in Omega dir vectors:\n");
4000 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4001 fprintf (file, "data dependence relation:\n");
4002 dump_data_dependence_relation (file, ddr);
4003 fprintf (file, ")\n");
4004 }
4005 }
4006
4007 return true;
4008 }
4009
4010 /* This computes the affine dependence relation between A and B with
4011 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4012 independence between two accesses, while CHREC_DONT_KNOW is used
4013 for representing the unknown relation.
4014
4015 Note that it is possible to stop the computation of the dependence
4016 relation the first time we detect a CHREC_KNOWN element for a given
4017 subscript. */
4018
4019 static void
4020 compute_affine_dependence (struct data_dependence_relation *ddr,
4021 struct loop *loop_nest)
4022 {
4023 struct data_reference *dra = DDR_A (ddr);
4024 struct data_reference *drb = DDR_B (ddr);
4025
4026 if (dump_file && (dump_flags & TDF_DETAILS))
4027 {
4028 fprintf (dump_file, "(compute_affine_dependence\n");
4029 fprintf (dump_file, " (stmt_a = \n");
4030 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4031 fprintf (dump_file, ")\n (stmt_b = \n");
4032 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4033 fprintf (dump_file, ")\n");
4034 }
4035
4036 /* Analyze only when the dependence relation is not yet known. */
4037 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4038 && !DDR_SELF_REFERENCE (ddr))
4039 {
4040 dependence_stats.num_dependence_tests++;
4041
4042 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4043 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4044 {
4045 if (flag_check_data_deps)
4046 {
4047 /* Compute the dependences using the first algorithm. */
4048 subscript_dependence_tester (ddr, loop_nest);
4049
4050 if (dump_file && (dump_flags & TDF_DETAILS))
4051 {
4052 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4053 dump_data_dependence_relation (dump_file, ddr);
4054 }
4055
4056 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4057 {
4058 bool maybe_dependent;
4059 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4060
4061 /* Save the result of the first DD analyzer. */
4062 dist_vects = DDR_DIST_VECTS (ddr);
4063 dir_vects = DDR_DIR_VECTS (ddr);
4064
4065 /* Reset the information. */
4066 DDR_DIST_VECTS (ddr) = NULL;
4067 DDR_DIR_VECTS (ddr) = NULL;
4068
4069 /* Compute the same information using Omega. */
4070 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4071 goto csys_dont_know;
4072
4073 if (dump_file && (dump_flags & TDF_DETAILS))
4074 {
4075 fprintf (dump_file, "Omega Analyzer\n");
4076 dump_data_dependence_relation (dump_file, ddr);
4077 }
4078
4079 /* Check that we get the same information. */
4080 if (maybe_dependent)
4081 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4082 dir_vects));
4083 }
4084 }
4085 else
4086 subscript_dependence_tester (ddr, loop_nest);
4087 }
4088
4089 /* As a last case, if the dependence cannot be determined, or if
4090 the dependence is considered too difficult to determine, answer
4091 "don't know". */
4092 else
4093 {
4094 csys_dont_know:;
4095 dependence_stats.num_dependence_undetermined++;
4096
4097 if (dump_file && (dump_flags & TDF_DETAILS))
4098 {
4099 fprintf (dump_file, "Data ref a:\n");
4100 dump_data_reference (dump_file, dra);
4101 fprintf (dump_file, "Data ref b:\n");
4102 dump_data_reference (dump_file, drb);
4103 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4104 }
4105 finalize_ddr_dependent (ddr, chrec_dont_know);
4106 }
4107 }
4108
4109 if (dump_file && (dump_flags & TDF_DETAILS))
4110 fprintf (dump_file, ")\n");
4111 }
4112
4113 /* This computes the dependence relation for the same data
4114 reference into DDR. */
4115
4116 static void
4117 compute_self_dependence (struct data_dependence_relation *ddr)
4118 {
4119 unsigned int i;
4120 struct subscript *subscript;
4121
4122 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4123 return;
4124
4125 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4126 i++)
4127 {
4128 if (SUB_CONFLICTS_IN_A (subscript))
4129 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4130 if (SUB_CONFLICTS_IN_B (subscript))
4131 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4132
4133 /* The accessed index overlaps for each iteration. */
4134 SUB_CONFLICTS_IN_A (subscript)
4135 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4136 SUB_CONFLICTS_IN_B (subscript)
4137 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4138 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4139 }
4140
4141 /* The distance vector is the zero vector. */
4142 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4143 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4144 }
4145
4146 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4147 the data references in DATAREFS, in the LOOP_NEST. When
4148 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4149 relations. */
4150
4151 void
4152 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4153 VEC (ddr_p, heap) **dependence_relations,
4154 VEC (loop_p, heap) *loop_nest,
4155 bool compute_self_and_rr)
4156 {
4157 struct data_dependence_relation *ddr;
4158 struct data_reference *a, *b;
4159 unsigned int i, j;
4160
4161 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4162 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4163 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4164 {
4165 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4166 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4167 if (loop_nest)
4168 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4169 }
4170
4171 if (compute_self_and_rr)
4172 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4173 {
4174 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4175 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4176 compute_self_dependence (ddr);
4177 }
4178 }
4179
4180 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4181 true if STMT clobbers memory, false otherwise. */
4182
4183 bool
4184 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4185 {
4186 bool clobbers_memory = false;
4187 data_ref_loc *ref;
4188 tree *op0, *op1;
4189 enum gimple_code stmt_code = gimple_code (stmt);
4190
4191 *references = NULL;
4192
4193 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4194 Calls have side-effects, except those to const or pure
4195 functions. */
4196 if ((stmt_code == GIMPLE_CALL
4197 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4198 || (stmt_code == GIMPLE_ASM
4199 && gimple_asm_volatile_p (stmt)))
4200 clobbers_memory = true;
4201
4202 if (!gimple_vuse (stmt))
4203 return clobbers_memory;
4204
4205 if (stmt_code == GIMPLE_ASSIGN)
4206 {
4207 tree base;
4208 op0 = gimple_assign_lhs_ptr (stmt);
4209 op1 = gimple_assign_rhs1_ptr (stmt);
4210
4211 if (DECL_P (*op1)
4212 || (REFERENCE_CLASS_P (*op1)
4213 && (base = get_base_address (*op1))
4214 && TREE_CODE (base) != SSA_NAME))
4215 {
4216 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4217 ref->pos = op1;
4218 ref->is_read = true;
4219 }
4220 }
4221 else if (stmt_code == GIMPLE_CALL)
4222 {
4223 unsigned i, n;
4224
4225 op0 = gimple_call_lhs_ptr (stmt);
4226 n = gimple_call_num_args (stmt);
4227 for (i = 0; i < n; i++)
4228 {
4229 op1 = gimple_call_arg_ptr (stmt, i);
4230
4231 if (DECL_P (*op1)
4232 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4233 {
4234 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4235 ref->pos = op1;
4236 ref->is_read = true;
4237 }
4238 }
4239 }
4240 else
4241 return clobbers_memory;
4242
4243 if (*op0
4244 && (DECL_P (*op0)
4245 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4246 {
4247 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4248 ref->pos = op0;
4249 ref->is_read = false;
4250 }
4251 return clobbers_memory;
4252 }
4253
4254 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4255 reference, returns false, otherwise returns true. NEST is the outermost
4256 loop of the loop nest in which the references should be analyzed. */
4257
4258 bool
4259 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4260 VEC (data_reference_p, heap) **datarefs)
4261 {
4262 unsigned i;
4263 VEC (data_ref_loc, heap) *references;
4264 data_ref_loc *ref;
4265 bool ret = true;
4266 data_reference_p dr;
4267
4268 if (get_references_in_stmt (stmt, &references))
4269 {
4270 VEC_free (data_ref_loc, heap, references);
4271 return false;
4272 }
4273
4274 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4275 {
4276 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4277 *ref->pos, stmt, ref->is_read);
4278 gcc_assert (dr != NULL);
4279 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4280 }
4281 VEC_free (data_ref_loc, heap, references);
4282 return ret;
4283 }
4284
4285 /* Stores the data references in STMT to DATAREFS. If there is an
4286 unanalyzable reference, returns false, otherwise returns true.
4287 NEST is the outermost loop of the loop nest in which the references
4288 should be instantiated, LOOP is the loop in which the references
4289 should be analyzed. */
4290
4291 bool
4292 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4293 VEC (data_reference_p, heap) **datarefs)
4294 {
4295 unsigned i;
4296 VEC (data_ref_loc, heap) *references;
4297 data_ref_loc *ref;
4298 bool ret = true;
4299 data_reference_p dr;
4300
4301 if (get_references_in_stmt (stmt, &references))
4302 {
4303 VEC_free (data_ref_loc, heap, references);
4304 return false;
4305 }
4306
4307 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4308 {
4309 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4310 gcc_assert (dr != NULL);
4311 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4312 }
4313
4314 VEC_free (data_ref_loc, heap, references);
4315 return ret;
4316 }
4317
4318 /* Search the data references in LOOP, and record the information into
4319 DATAREFS. Returns chrec_dont_know when failing to analyze a
4320 difficult case, returns NULL_TREE otherwise. */
4321
4322 tree
4323 find_data_references_in_bb (struct loop *loop, basic_block bb,
4324 VEC (data_reference_p, heap) **datarefs)
4325 {
4326 gimple_stmt_iterator bsi;
4327
4328 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4329 {
4330 gimple stmt = gsi_stmt (bsi);
4331
4332 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4333 {
4334 struct data_reference *res;
4335 res = XCNEW (struct data_reference);
4336 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4337
4338 return chrec_dont_know;
4339 }
4340 }
4341
4342 return NULL_TREE;
4343 }
4344
4345 /* Search the data references in LOOP, and record the information into
4346 DATAREFS. Returns chrec_dont_know when failing to analyze a
4347 difficult case, returns NULL_TREE otherwise.
4348
4349 TODO: This function should be made smarter so that it can handle address
4350 arithmetic as if they were array accesses, etc. */
4351
4352 tree
4353 find_data_references_in_loop (struct loop *loop,
4354 VEC (data_reference_p, heap) **datarefs)
4355 {
4356 basic_block bb, *bbs;
4357 unsigned int i;
4358
4359 bbs = get_loop_body_in_dom_order (loop);
4360
4361 for (i = 0; i < loop->num_nodes; i++)
4362 {
4363 bb = bbs[i];
4364
4365 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4366 {
4367 free (bbs);
4368 return chrec_dont_know;
4369 }
4370 }
4371 free (bbs);
4372
4373 return NULL_TREE;
4374 }
4375
4376 /* Recursive helper function. */
4377
4378 static bool
4379 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4380 {
4381 /* Inner loops of the nest should not contain siblings. Example:
4382 when there are two consecutive loops,
4383
4384 | loop_0
4385 | loop_1
4386 | A[{0, +, 1}_1]
4387 | endloop_1
4388 | loop_2
4389 | A[{0, +, 1}_2]
4390 | endloop_2
4391 | endloop_0
4392
4393 the dependence relation cannot be captured by the distance
4394 abstraction. */
4395 if (loop->next)
4396 return false;
4397
4398 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4399 if (loop->inner)
4400 return find_loop_nest_1 (loop->inner, loop_nest);
4401 return true;
4402 }
4403
4404 /* Return false when the LOOP is not well nested. Otherwise return
4405 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4406 contain the loops from the outermost to the innermost, as they will
4407 appear in the classic distance vector. */
4408
4409 bool
4410 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4411 {
4412 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4413 if (loop->inner)
4414 return find_loop_nest_1 (loop->inner, loop_nest);
4415 return true;
4416 }
4417
4418 /* Returns true when the data dependences have been computed, false otherwise.
4419 Given a loop nest LOOP, the following vectors are returned:
4420 DATAREFS is initialized to all the array elements contained in this loop,
4421 DEPENDENCE_RELATIONS contains the relations between the data references.
4422 Compute read-read and self relations if
4423 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4424
4425 bool
4426 compute_data_dependences_for_loop (struct loop *loop,
4427 bool compute_self_and_read_read_dependences,
4428 VEC (loop_p, heap) **loop_nest,
4429 VEC (data_reference_p, heap) **datarefs,
4430 VEC (ddr_p, heap) **dependence_relations)
4431 {
4432 bool res = true;
4433
4434 memset (&dependence_stats, 0, sizeof (dependence_stats));
4435
4436 /* If the loop nest is not well formed, or one of the data references
4437 is not computable, give up without spending time to compute other
4438 dependences. */
4439 if (!loop
4440 || !find_loop_nest (loop, loop_nest)
4441 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4442 {
4443 struct data_dependence_relation *ddr;
4444
4445 /* Insert a single relation into dependence_relations:
4446 chrec_dont_know. */
4447 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4448 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4449 res = false;
4450 }
4451 else
4452 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4453 compute_self_and_read_read_dependences);
4454
4455 if (dump_file && (dump_flags & TDF_STATS))
4456 {
4457 fprintf (dump_file, "Dependence tester statistics:\n");
4458
4459 fprintf (dump_file, "Number of dependence tests: %d\n",
4460 dependence_stats.num_dependence_tests);
4461 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4462 dependence_stats.num_dependence_dependent);
4463 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4464 dependence_stats.num_dependence_independent);
4465 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4466 dependence_stats.num_dependence_undetermined);
4467
4468 fprintf (dump_file, "Number of subscript tests: %d\n",
4469 dependence_stats.num_subscript_tests);
4470 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4471 dependence_stats.num_subscript_undetermined);
4472 fprintf (dump_file, "Number of same subscript function: %d\n",
4473 dependence_stats.num_same_subscript_function);
4474
4475 fprintf (dump_file, "Number of ziv tests: %d\n",
4476 dependence_stats.num_ziv);
4477 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4478 dependence_stats.num_ziv_dependent);
4479 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4480 dependence_stats.num_ziv_independent);
4481 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4482 dependence_stats.num_ziv_unimplemented);
4483
4484 fprintf (dump_file, "Number of siv tests: %d\n",
4485 dependence_stats.num_siv);
4486 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4487 dependence_stats.num_siv_dependent);
4488 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4489 dependence_stats.num_siv_independent);
4490 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4491 dependence_stats.num_siv_unimplemented);
4492
4493 fprintf (dump_file, "Number of miv tests: %d\n",
4494 dependence_stats.num_miv);
4495 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4496 dependence_stats.num_miv_dependent);
4497 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4498 dependence_stats.num_miv_independent);
4499 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4500 dependence_stats.num_miv_unimplemented);
4501 }
4502
4503 return res;
4504 }
4505
4506 /* Returns true when the data dependences for the basic block BB have been
4507 computed, false otherwise.
4508 DATAREFS is initialized to all the array elements contained in this basic
4509 block, DEPENDENCE_RELATIONS contains the relations between the data
4510 references. Compute read-read and self relations if
4511 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4512 bool
4513 compute_data_dependences_for_bb (basic_block bb,
4514 bool compute_self_and_read_read_dependences,
4515 VEC (data_reference_p, heap) **datarefs,
4516 VEC (ddr_p, heap) **dependence_relations)
4517 {
4518 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4519 return false;
4520
4521 compute_all_dependences (*datarefs, dependence_relations, NULL,
4522 compute_self_and_read_read_dependences);
4523 return true;
4524 }
4525
4526 /* Entry point (for testing only). Analyze all the data references
4527 and the dependence relations in LOOP.
4528
4529 The data references are computed first.
4530
4531 A relation on these nodes is represented by a complete graph. Some
4532 of the relations could be of no interest, thus the relations can be
4533 computed on demand.
4534
4535 In the following function we compute all the relations. This is
4536 just a first implementation that is here for:
4537 - for showing how to ask for the dependence relations,
4538 - for the debugging the whole dependence graph,
4539 - for the dejagnu testcases and maintenance.
4540
4541 It is possible to ask only for a part of the graph, avoiding to
4542 compute the whole dependence graph. The computed dependences are
4543 stored in a knowledge base (KB) such that later queries don't
4544 recompute the same information. The implementation of this KB is
4545 transparent to the optimizer, and thus the KB can be changed with a
4546 more efficient implementation, or the KB could be disabled. */
4547 static void
4548 analyze_all_data_dependences (struct loop *loop)
4549 {
4550 unsigned int i;
4551 int nb_data_refs = 10;
4552 VEC (data_reference_p, heap) *datarefs =
4553 VEC_alloc (data_reference_p, heap, nb_data_refs);
4554 VEC (ddr_p, heap) *dependence_relations =
4555 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4556 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4557
4558 /* Compute DDs on the whole function. */
4559 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4560 &dependence_relations);
4561
4562 if (dump_file)
4563 {
4564 dump_data_dependence_relations (dump_file, dependence_relations);
4565 fprintf (dump_file, "\n\n");
4566
4567 if (dump_flags & TDF_DETAILS)
4568 dump_dist_dir_vectors (dump_file, dependence_relations);
4569
4570 if (dump_flags & TDF_STATS)
4571 {
4572 unsigned nb_top_relations = 0;
4573 unsigned nb_bot_relations = 0;
4574 unsigned nb_chrec_relations = 0;
4575 struct data_dependence_relation *ddr;
4576
4577 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4578 {
4579 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4580 nb_top_relations++;
4581
4582 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4583 nb_bot_relations++;
4584
4585 else
4586 nb_chrec_relations++;
4587 }
4588
4589 gather_stats_on_scev_database ();
4590 }
4591 }
4592
4593 VEC_free (loop_p, heap, loop_nest);
4594 free_dependence_relations (dependence_relations);
4595 free_data_refs (datarefs);
4596 }
4597
4598 /* Computes all the data dependences and check that the results of
4599 several analyzers are the same. */
4600
4601 void
4602 tree_check_data_deps (void)
4603 {
4604 loop_iterator li;
4605 struct loop *loop_nest;
4606
4607 FOR_EACH_LOOP (li, loop_nest, 0)
4608 analyze_all_data_dependences (loop_nest);
4609 }
4610
4611 /* Free the memory used by a data dependence relation DDR. */
4612
4613 void
4614 free_dependence_relation (struct data_dependence_relation *ddr)
4615 {
4616 if (ddr == NULL)
4617 return;
4618
4619 if (DDR_SUBSCRIPTS (ddr))
4620 free_subscripts (DDR_SUBSCRIPTS (ddr));
4621 if (DDR_DIST_VECTS (ddr))
4622 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4623 if (DDR_DIR_VECTS (ddr))
4624 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4625
4626 free (ddr);
4627 }
4628
4629 /* Free the memory used by the data dependence relations from
4630 DEPENDENCE_RELATIONS. */
4631
4632 void
4633 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4634 {
4635 unsigned int i;
4636 struct data_dependence_relation *ddr;
4637
4638 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4639 if (ddr)
4640 free_dependence_relation (ddr);
4641
4642 VEC_free (ddr_p, heap, dependence_relations);
4643 }
4644
4645 /* Free the memory used by the data references from DATAREFS. */
4646
4647 void
4648 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4649 {
4650 unsigned int i;
4651 struct data_reference *dr;
4652
4653 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4654 free_data_ref (dr);
4655 VEC_free (data_reference_p, heap, datarefs);
4656 }
4657
4658 \f
4659
4660 /* Dump vertex I in RDG to FILE. */
4661
4662 void
4663 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4664 {
4665 struct vertex *v = &(rdg->vertices[i]);
4666 struct graph_edge *e;
4667
4668 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4669 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4670 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4671
4672 if (v->pred)
4673 for (e = v->pred; e; e = e->pred_next)
4674 fprintf (file, " %d", e->src);
4675
4676 fprintf (file, ") (out:");
4677
4678 if (v->succ)
4679 for (e = v->succ; e; e = e->succ_next)
4680 fprintf (file, " %d", e->dest);
4681
4682 fprintf (file, ")\n");
4683 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4684 fprintf (file, ")\n");
4685 }
4686
4687 /* Call dump_rdg_vertex on stderr. */
4688
4689 DEBUG_FUNCTION void
4690 debug_rdg_vertex (struct graph *rdg, int i)
4691 {
4692 dump_rdg_vertex (stderr, rdg, i);
4693 }
4694
4695 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4696 dumped vertices to that bitmap. */
4697
4698 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4699 {
4700 int i;
4701
4702 fprintf (file, "(%d\n", c);
4703
4704 for (i = 0; i < rdg->n_vertices; i++)
4705 if (rdg->vertices[i].component == c)
4706 {
4707 if (dumped)
4708 bitmap_set_bit (dumped, i);
4709
4710 dump_rdg_vertex (file, rdg, i);
4711 }
4712
4713 fprintf (file, ")\n");
4714 }
4715
4716 /* Call dump_rdg_vertex on stderr. */
4717
4718 DEBUG_FUNCTION void
4719 debug_rdg_component (struct graph *rdg, int c)
4720 {
4721 dump_rdg_component (stderr, rdg, c, NULL);
4722 }
4723
4724 /* Dump the reduced dependence graph RDG to FILE. */
4725
4726 void
4727 dump_rdg (FILE *file, struct graph *rdg)
4728 {
4729 int i;
4730 bitmap dumped = BITMAP_ALLOC (NULL);
4731
4732 fprintf (file, "(rdg\n");
4733
4734 for (i = 0; i < rdg->n_vertices; i++)
4735 if (!bitmap_bit_p (dumped, i))
4736 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4737
4738 fprintf (file, ")\n");
4739 BITMAP_FREE (dumped);
4740 }
4741
4742 /* Call dump_rdg on stderr. */
4743
4744 DEBUG_FUNCTION void
4745 debug_rdg (struct graph *rdg)
4746 {
4747 dump_rdg (stderr, rdg);
4748 }
4749
4750 static void
4751 dot_rdg_1 (FILE *file, struct graph *rdg)
4752 {
4753 int i;
4754
4755 fprintf (file, "digraph RDG {\n");
4756
4757 for (i = 0; i < rdg->n_vertices; i++)
4758 {
4759 struct vertex *v = &(rdg->vertices[i]);
4760 struct graph_edge *e;
4761
4762 /* Highlight reads from memory. */
4763 if (RDG_MEM_READS_STMT (rdg, i))
4764 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4765
4766 /* Highlight stores to memory. */
4767 if (RDG_MEM_WRITE_STMT (rdg, i))
4768 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4769
4770 if (v->succ)
4771 for (e = v->succ; e; e = e->succ_next)
4772 switch (RDGE_TYPE (e))
4773 {
4774 case input_dd:
4775 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4776 break;
4777
4778 case output_dd:
4779 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4780 break;
4781
4782 case flow_dd:
4783 /* These are the most common dependences: don't print these. */
4784 fprintf (file, "%d -> %d \n", i, e->dest);
4785 break;
4786
4787 case anti_dd:
4788 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4789 break;
4790
4791 default:
4792 gcc_unreachable ();
4793 }
4794 }
4795
4796 fprintf (file, "}\n\n");
4797 }
4798
4799 /* Display the Reduced Dependence Graph using dotty. */
4800 extern void dot_rdg (struct graph *);
4801
4802 DEBUG_FUNCTION void
4803 dot_rdg (struct graph *rdg)
4804 {
4805 /* When debugging, enable the following code. This cannot be used
4806 in production compilers because it calls "system". */
4807 #if 0
4808 FILE *file = fopen ("/tmp/rdg.dot", "w");
4809 gcc_assert (file != NULL);
4810
4811 dot_rdg_1 (file, rdg);
4812 fclose (file);
4813
4814 system ("dotty /tmp/rdg.dot &");
4815 #else
4816 dot_rdg_1 (stderr, rdg);
4817 #endif
4818 }
4819
4820 /* This structure is used for recording the mapping statement index in
4821 the RDG. */
4822
4823 struct GTY(()) rdg_vertex_info
4824 {
4825 gimple stmt;
4826 int index;
4827 };
4828
4829 /* Returns the index of STMT in RDG. */
4830
4831 int
4832 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4833 {
4834 struct rdg_vertex_info rvi, *slot;
4835
4836 rvi.stmt = stmt;
4837 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4838
4839 if (!slot)
4840 return -1;
4841
4842 return slot->index;
4843 }
4844
4845 /* Creates an edge in RDG for each distance vector from DDR. The
4846 order that we keep track of in the RDG is the order in which
4847 statements have to be executed. */
4848
4849 static void
4850 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4851 {
4852 struct graph_edge *e;
4853 int va, vb;
4854 data_reference_p dra = DDR_A (ddr);
4855 data_reference_p drb = DDR_B (ddr);
4856 unsigned level = ddr_dependence_level (ddr);
4857
4858 /* For non scalar dependences, when the dependence is REVERSED,
4859 statement B has to be executed before statement A. */
4860 if (level > 0
4861 && !DDR_REVERSED_P (ddr))
4862 {
4863 data_reference_p tmp = dra;
4864 dra = drb;
4865 drb = tmp;
4866 }
4867
4868 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4869 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4870
4871 if (va < 0 || vb < 0)
4872 return;
4873
4874 e = add_edge (rdg, va, vb);
4875 e->data = XNEW (struct rdg_edge);
4876
4877 RDGE_LEVEL (e) = level;
4878 RDGE_RELATION (e) = ddr;
4879
4880 /* Determines the type of the data dependence. */
4881 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4882 RDGE_TYPE (e) = input_dd;
4883 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4884 RDGE_TYPE (e) = output_dd;
4885 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4886 RDGE_TYPE (e) = flow_dd;
4887 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4888 RDGE_TYPE (e) = anti_dd;
4889 }
4890
4891 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4892 the index of DEF in RDG. */
4893
4894 static void
4895 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4896 {
4897 use_operand_p imm_use_p;
4898 imm_use_iterator iterator;
4899
4900 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4901 {
4902 struct graph_edge *e;
4903 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4904
4905 if (use < 0)
4906 continue;
4907
4908 e = add_edge (rdg, idef, use);
4909 e->data = XNEW (struct rdg_edge);
4910 RDGE_TYPE (e) = flow_dd;
4911 RDGE_RELATION (e) = NULL;
4912 }
4913 }
4914
4915 /* Creates the edges of the reduced dependence graph RDG. */
4916
4917 static void
4918 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4919 {
4920 int i;
4921 struct data_dependence_relation *ddr;
4922 def_operand_p def_p;
4923 ssa_op_iter iter;
4924
4925 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4926 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4927 create_rdg_edge_for_ddr (rdg, ddr);
4928
4929 for (i = 0; i < rdg->n_vertices; i++)
4930 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4931 iter, SSA_OP_DEF)
4932 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4933 }
4934
4935 /* Build the vertices of the reduced dependence graph RDG. */
4936
4937 void
4938 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4939 {
4940 int i, j;
4941 gimple stmt;
4942
4943 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4944 {
4945 VEC (data_ref_loc, heap) *references;
4946 data_ref_loc *ref;
4947 struct vertex *v = &(rdg->vertices[i]);
4948 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4949 struct rdg_vertex_info **slot;
4950
4951 rvi->stmt = stmt;
4952 rvi->index = i;
4953 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4954
4955 if (!*slot)
4956 *slot = rvi;
4957 else
4958 free (rvi);
4959
4960 v->data = XNEW (struct rdg_vertex);
4961 RDG_STMT (rdg, i) = stmt;
4962
4963 RDG_MEM_WRITE_STMT (rdg, i) = false;
4964 RDG_MEM_READS_STMT (rdg, i) = false;
4965 if (gimple_code (stmt) == GIMPLE_PHI)
4966 continue;
4967
4968 get_references_in_stmt (stmt, &references);
4969 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4970 if (!ref->is_read)
4971 RDG_MEM_WRITE_STMT (rdg, i) = true;
4972 else
4973 RDG_MEM_READS_STMT (rdg, i) = true;
4974
4975 VEC_free (data_ref_loc, heap, references);
4976 }
4977 }
4978
4979 /* Initialize STMTS with all the statements of LOOP. When
4980 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4981 which we discover statements is important as
4982 generate_loops_for_partition is using the same traversal for
4983 identifying statements. */
4984
4985 static void
4986 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4987 {
4988 unsigned int i;
4989 basic_block *bbs = get_loop_body_in_dom_order (loop);
4990
4991 for (i = 0; i < loop->num_nodes; i++)
4992 {
4993 basic_block bb = bbs[i];
4994 gimple_stmt_iterator bsi;
4995 gimple stmt;
4996
4997 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4998 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4999
5000 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5001 {
5002 stmt = gsi_stmt (bsi);
5003 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5004 VEC_safe_push (gimple, heap, *stmts, stmt);
5005 }
5006 }
5007
5008 free (bbs);
5009 }
5010
5011 /* Returns true when all the dependences are computable. */
5012
5013 static bool
5014 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5015 {
5016 ddr_p ddr;
5017 unsigned int i;
5018
5019 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5020 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5021 return false;
5022
5023 return true;
5024 }
5025
5026 /* Computes a hash function for element ELT. */
5027
5028 static hashval_t
5029 hash_stmt_vertex_info (const void *elt)
5030 {
5031 const struct rdg_vertex_info *const rvi =
5032 (const struct rdg_vertex_info *) elt;
5033 gimple stmt = rvi->stmt;
5034
5035 return htab_hash_pointer (stmt);
5036 }
5037
5038 /* Compares database elements E1 and E2. */
5039
5040 static int
5041 eq_stmt_vertex_info (const void *e1, const void *e2)
5042 {
5043 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5044 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5045
5046 return elt1->stmt == elt2->stmt;
5047 }
5048
5049 /* Free the element E. */
5050
5051 static void
5052 hash_stmt_vertex_del (void *e)
5053 {
5054 free (e);
5055 }
5056
5057 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5058 statement of the loop nest, and one edge per data dependence or
5059 scalar dependence. */
5060
5061 struct graph *
5062 build_empty_rdg (int n_stmts)
5063 {
5064 int nb_data_refs = 10;
5065 struct graph *rdg = new_graph (n_stmts);
5066
5067 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5068 eq_stmt_vertex_info, hash_stmt_vertex_del);
5069 return rdg;
5070 }
5071
5072 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5073 statement of the loop nest, and one edge per data dependence or
5074 scalar dependence. */
5075
5076 struct graph *
5077 build_rdg (struct loop *loop,
5078 VEC (loop_p, heap) **loop_nest,
5079 VEC (ddr_p, heap) **dependence_relations,
5080 VEC (data_reference_p, heap) **datarefs)
5081 {
5082 struct graph *rdg = NULL;
5083 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5084
5085 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5086 dependence_relations);
5087
5088 if (known_dependences_p (*dependence_relations))
5089 {
5090 stmts_from_loop (loop, &stmts);
5091 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5092 create_rdg_vertices (rdg, stmts);
5093 create_rdg_edges (rdg, *dependence_relations);
5094 }
5095
5096 VEC_free (gimple, heap, stmts);
5097 return rdg;
5098 }
5099
5100 /* Free the reduced dependence graph RDG. */
5101
5102 void
5103 free_rdg (struct graph *rdg)
5104 {
5105 int i;
5106
5107 for (i = 0; i < rdg->n_vertices; i++)
5108 {
5109 struct vertex *v = &(rdg->vertices[i]);
5110 struct graph_edge *e;
5111
5112 for (e = v->succ; e; e = e->succ_next)
5113 free (e->data);
5114
5115 free (v->data);
5116 }
5117
5118 htab_delete (rdg->indices);
5119 free_graph (rdg);
5120 }
5121
5122 /* Initialize STMTS with all the statements of LOOP that contain a
5123 store to memory. */
5124
5125 void
5126 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5127 {
5128 unsigned int i;
5129 basic_block *bbs = get_loop_body_in_dom_order (loop);
5130
5131 for (i = 0; i < loop->num_nodes; i++)
5132 {
5133 basic_block bb = bbs[i];
5134 gimple_stmt_iterator bsi;
5135
5136 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5137 if (gimple_vdef (gsi_stmt (bsi)))
5138 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5139 }
5140
5141 free (bbs);
5142 }
5143
5144 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5145 that contains a data reference on its LHS with a stride of the same
5146 size as its unit type. */
5147
5148 bool
5149 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5150 {
5151 tree op0, op1;
5152 bool res;
5153 struct data_reference *dr;
5154
5155 if (!stmt
5156 || !gimple_vdef (stmt)
5157 || !is_gimple_assign (stmt)
5158 || !gimple_assign_single_p (stmt)
5159 || !(op1 = gimple_assign_rhs1 (stmt))
5160 || !(integer_zerop (op1) || real_zerop (op1)))
5161 return false;
5162
5163 dr = XCNEW (struct data_reference);
5164 op0 = gimple_assign_lhs (stmt);
5165
5166 DR_STMT (dr) = stmt;
5167 DR_REF (dr) = op0;
5168
5169 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5170 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5171
5172 free_data_ref (dr);
5173 return res;
5174 }
5175
5176 /* Initialize STMTS with all the statements of LOOP that contain a
5177 store to memory of the form "A[i] = 0". */
5178
5179 void
5180 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5181 {
5182 unsigned int i;
5183 basic_block bb;
5184 gimple_stmt_iterator si;
5185 gimple stmt;
5186 basic_block *bbs = get_loop_body_in_dom_order (loop);
5187
5188 for (i = 0; i < loop->num_nodes; i++)
5189 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5190 if ((stmt = gsi_stmt (si))
5191 && stmt_with_adjacent_zero_store_dr_p (stmt))
5192 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5193
5194 free (bbs);
5195 }
5196
5197 /* For a data reference REF, return the declaration of its base
5198 address or NULL_TREE if the base is not determined. */
5199
5200 static inline tree
5201 ref_base_address (gimple stmt, data_ref_loc *ref)
5202 {
5203 tree base = NULL_TREE;
5204 tree base_address;
5205 struct data_reference *dr = XCNEW (struct data_reference);
5206
5207 DR_STMT (dr) = stmt;
5208 DR_REF (dr) = *ref->pos;
5209 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5210 base_address = DR_BASE_ADDRESS (dr);
5211
5212 if (!base_address)
5213 goto end;
5214
5215 switch (TREE_CODE (base_address))
5216 {
5217 case ADDR_EXPR:
5218 base = TREE_OPERAND (base_address, 0);
5219 break;
5220
5221 default:
5222 base = base_address;
5223 break;
5224 }
5225
5226 end:
5227 free_data_ref (dr);
5228 return base;
5229 }
5230
5231 /* Determines whether the statement from vertex V of the RDG has a
5232 definition used outside the loop that contains this statement. */
5233
5234 bool
5235 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5236 {
5237 gimple stmt = RDG_STMT (rdg, v);
5238 struct loop *loop = loop_containing_stmt (stmt);
5239 use_operand_p imm_use_p;
5240 imm_use_iterator iterator;
5241 ssa_op_iter it;
5242 def_operand_p def_p;
5243
5244 if (!loop)
5245 return true;
5246
5247 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5248 {
5249 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5250 {
5251 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5252 return true;
5253 }
5254 }
5255
5256 return false;
5257 }
5258
5259 /* Determines whether statements S1 and S2 access to similar memory
5260 locations. Two memory accesses are considered similar when they
5261 have the same base address declaration, i.e. when their
5262 ref_base_address is the same. */
5263
5264 bool
5265 have_similar_memory_accesses (gimple s1, gimple s2)
5266 {
5267 bool res = false;
5268 unsigned i, j;
5269 VEC (data_ref_loc, heap) *refs1, *refs2;
5270 data_ref_loc *ref1, *ref2;
5271
5272 get_references_in_stmt (s1, &refs1);
5273 get_references_in_stmt (s2, &refs2);
5274
5275 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5276 {
5277 tree base1 = ref_base_address (s1, ref1);
5278
5279 if (base1)
5280 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5281 if (base1 == ref_base_address (s2, ref2))
5282 {
5283 res = true;
5284 goto end;
5285 }
5286 }
5287
5288 end:
5289 VEC_free (data_ref_loc, heap, refs1);
5290 VEC_free (data_ref_loc, heap, refs2);
5291 return res;
5292 }
5293
5294 /* Helper function for the hashtab. */
5295
5296 static int
5297 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5298 {
5299 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5300 CONST_CAST_GIMPLE ((const_gimple) s2));
5301 }
5302
5303 /* Helper function for the hashtab. */
5304
5305 static hashval_t
5306 ref_base_address_1 (const void *s)
5307 {
5308 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5309 unsigned i;
5310 VEC (data_ref_loc, heap) *refs;
5311 data_ref_loc *ref;
5312 hashval_t res = 0;
5313
5314 get_references_in_stmt (stmt, &refs);
5315
5316 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5317 if (!ref->is_read)
5318 {
5319 res = htab_hash_pointer (ref_base_address (stmt, ref));
5320 break;
5321 }
5322
5323 VEC_free (data_ref_loc, heap, refs);
5324 return res;
5325 }
5326
5327 /* Try to remove duplicated write data references from STMTS. */
5328
5329 void
5330 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5331 {
5332 unsigned i;
5333 gimple stmt;
5334 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5335 have_similar_memory_accesses_1, NULL);
5336
5337 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5338 {
5339 void **slot;
5340
5341 slot = htab_find_slot (seen, stmt, INSERT);
5342
5343 if (*slot)
5344 VEC_ordered_remove (gimple, *stmts, i);
5345 else
5346 {
5347 *slot = (void *) stmt;
5348 i++;
5349 }
5350 }
5351
5352 htab_delete (seen);
5353 }
5354
5355 /* Returns the index of PARAMETER in the parameters vector of the
5356 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5357
5358 int
5359 access_matrix_get_index_for_parameter (tree parameter,
5360 struct access_matrix *access_matrix)
5361 {
5362 int i;
5363 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5364 tree lambda_parameter;
5365
5366 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5367 if (lambda_parameter == parameter)
5368 return i + AM_NB_INDUCTION_VARS (access_matrix);
5369
5370 return -1;
5371 }