re PR tree-optimization/50789 (Gather vectorization)
[gcc.git] / gcc / tree-data-ref.c
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
82 #include "cfgloop.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
88
89 static struct datadep_stats
90 {
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
95
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
99
100 int num_ziv;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
104
105 int num_siv;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
109
110 int num_miv;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
114 } dependence_stats;
115
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
119 struct loop *);
120 /* Returns true iff A divides B. */
121
122 static inline bool
123 tree_fold_divides_p (const_tree a, const_tree b)
124 {
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
128 }
129
130 /* Returns true iff A divides B. */
131
132 static inline bool
133 int_divides_p (int a, int b)
134 {
135 return ((b % a) == 0);
136 }
137
138 \f
139
140 /* Dump into FILE all the data references from DATAREFS. */
141
142 void
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
144 {
145 unsigned int i;
146 struct data_reference *dr;
147
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
150 }
151
152 /* Dump into STDERR all the data references from DATAREFS. */
153
154 DEBUG_FUNCTION void
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 {
157 dump_data_references (stderr, datarefs);
158 }
159
160 /* Dump to STDERR all the dependence relations from DDRS. */
161
162 DEBUG_FUNCTION void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 {
165 dump_data_dependence_relations (stderr, ddrs);
166 }
167
168 /* Dump into FILE all the dependence relations from DDRS. */
169
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
173 {
174 unsigned int i;
175 struct data_dependence_relation *ddr;
176
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
179 }
180
181 /* Print to STDERR the data_reference DR. */
182
183 DEBUG_FUNCTION void
184 debug_data_reference (struct data_reference *dr)
185 {
186 dump_data_reference (stderr, dr);
187 }
188
189 /* Dump function for a DATA_REFERENCE structure. */
190
191 void
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
194 {
195 unsigned int i;
196
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 {
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 }
211 fprintf (outf, "#)\n");
212 }
213
214 /* Dumps the affine function described by FN to the file OUTF. */
215
216 static void
217 dump_affine_function (FILE *outf, affine_fn fn)
218 {
219 unsigned i;
220 tree coef;
221
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 {
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
228 }
229 }
230
231 /* Dumps the conflict function CF to the file OUTF. */
232
233 static void
234 dump_conflict_function (FILE *outf, conflict_function *cf)
235 {
236 unsigned i;
237
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
242 else
243 {
244 for (i = 0; i < cf->n; i++)
245 {
246 fprintf (outf, "[");
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
249 }
250 }
251 }
252
253 /* Dump function for a SUBSCRIPT structure. */
254
255 void
256 dump_subscript (FILE *outf, struct subscript *subscript)
257 {
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
264 {
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
268 }
269
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
274 {
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
278 }
279
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
284 }
285
286 /* Print the classic direction vector DIRV to OUTF. */
287
288 void
289 print_direction_vector (FILE *outf,
290 lambda_vector dirv,
291 int length)
292 {
293 int eq;
294
295 for (eq = 0; eq < length; eq++)
296 {
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
298 dirv[eq]);
299
300 switch (dir)
301 {
302 case dir_positive:
303 fprintf (outf, " +");
304 break;
305 case dir_negative:
306 fprintf (outf, " -");
307 break;
308 case dir_equal:
309 fprintf (outf, " =");
310 break;
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
313 break;
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
316 break;
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
319 break;
320 case dir_star:
321 fprintf (outf, " *");
322 break;
323 default:
324 fprintf (outf, "indep");
325 break;
326 }
327 }
328 fprintf (outf, "\n");
329 }
330
331 /* Print a vector of direction vectors. */
332
333 void
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
335 int length)
336 {
337 unsigned j;
338 lambda_vector v;
339
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
342 }
343
344 /* Print out a vector VEC of length N to OUTFILE. */
345
346 static inline void
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
348 {
349 int i;
350
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
354 }
355
356 /* Print a vector of distance vectors. */
357
358 void
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
360 int length)
361 {
362 unsigned j;
363 lambda_vector v;
364
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
367 }
368
369 /* Debug version. */
370
371 DEBUG_FUNCTION void
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 {
374 dump_data_dependence_relation (stderr, ddr);
375 }
376
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
378
379 void
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
382 {
383 struct data_reference *dra, *drb;
384
385 fprintf (outf, "(Data Dep: \n");
386
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 {
389 if (ddr)
390 {
391 dra = DDR_A (ddr);
392 drb = DDR_B (ddr);
393 if (dra)
394 dump_data_reference (outf, dra);
395 else
396 fprintf (outf, " (nil)\n");
397 if (drb)
398 dump_data_reference (outf, drb);
399 else
400 fprintf (outf, " (nil)\n");
401 }
402 fprintf (outf, " (don't know)\n)\n");
403 return;
404 }
405
406 dra = DDR_A (ddr);
407 drb = DDR_B (ddr);
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
410
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
413
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
415 {
416 unsigned int i;
417 struct loop *loopi;
418
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 {
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
426 }
427
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
433
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 {
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
438 DDR_NB_LOOPS (ddr));
439 }
440
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 {
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
445 DDR_NB_LOOPS (ddr));
446 }
447 }
448
449 fprintf (outf, ")\n");
450 }
451
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
453
454 void
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
457 {
458 switch (dir)
459 {
460 case dir_positive:
461 fprintf (file, "+");
462 break;
463
464 case dir_negative:
465 fprintf (file, "-");
466 break;
467
468 case dir_equal:
469 fprintf (file, "=");
470 break;
471
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
474 break;
475
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
478 break;
479
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
482 break;
483
484 case dir_star:
485 fprintf (file, "*");
486 break;
487
488 default:
489 break;
490 }
491 }
492
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
496 considered nest. */
497
498 void
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
500 {
501 unsigned int i, j;
502 struct data_dependence_relation *ddr;
503 lambda_vector v;
504
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 {
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 {
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
513 }
514
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 {
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
520 }
521 }
522
523 fprintf (file, "\n\n");
524 }
525
526 /* Dumps the data dependence relations DDRS in FILE. */
527
528 void
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
530 {
531 unsigned int i;
532 struct data_dependence_relation *ddr;
533
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
536
537 fprintf (file, "\n\n");
538 }
539
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
544 is returned. */
545
546 static bool
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
549 {
550 tree var0, var1;
551 tree off0, off1;
552 enum tree_code ocode = code;
553
554 *var = NULL_TREE;
555 *off = NULL_TREE;
556
557 switch (code)
558 {
559 case INTEGER_CST:
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
562 return true;
563
564 case POINTER_PLUS_EXPR:
565 ocode = PLUS_EXPR;
566 /* FALLTHROUGH */
567 case PLUS_EXPR:
568 case MINUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
573 return true;
574
575 case MULT_EXPR:
576 if (TREE_CODE (op1) != INTEGER_CST)
577 return false;
578
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
582 return true;
583
584 case ADDR_EXPR:
585 {
586 tree base, poffset;
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
590
591 op0 = TREE_OPERAND (op0, 0);
592 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
593 &pmode, &punsignedp, &pvolatilep, false);
594
595 if (pbitpos % BITS_PER_UNIT != 0)
596 return false;
597 base = build_fold_addr_expr (base);
598 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
599
600 if (poffset)
601 {
602 split_constant_offset (poffset, &poffset, &off1);
603 off0 = size_binop (PLUS_EXPR, off0, off1);
604 if (POINTER_TYPE_P (TREE_TYPE (base)))
605 base = fold_build_pointer_plus (base, poffset);
606 else
607 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
608 fold_convert (TREE_TYPE (base), poffset));
609 }
610
611 var0 = fold_convert (type, base);
612
613 /* If variable length types are involved, punt, otherwise casts
614 might be converted into ARRAY_REFs in gimplify_conversion.
615 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
616 possibly no longer appears in current GIMPLE, might resurface.
617 This perhaps could run
618 if (CONVERT_EXPR_P (var0))
619 {
620 gimplify_conversion (&var0);
621 // Attempt to fill in any within var0 found ARRAY_REF's
622 // element size from corresponding op embedded ARRAY_REF,
623 // if unsuccessful, just punt.
624 } */
625 while (POINTER_TYPE_P (type))
626 type = TREE_TYPE (type);
627 if (int_size_in_bytes (type) < 0)
628 return false;
629
630 *var = var0;
631 *off = off0;
632 return true;
633 }
634
635 case SSA_NAME:
636 {
637 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
638 enum tree_code subcode;
639
640 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
641 return false;
642
643 var0 = gimple_assign_rhs1 (def_stmt);
644 subcode = gimple_assign_rhs_code (def_stmt);
645 var1 = gimple_assign_rhs2 (def_stmt);
646
647 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
648 }
649 CASE_CONVERT:
650 {
651 /* We must not introduce undefined overflow, and we must not change the value.
652 Hence we're okay if the inner type doesn't overflow to start with
653 (pointer or signed), the outer type also is an integer or pointer
654 and the outer precision is at least as large as the inner. */
655 tree itype = TREE_TYPE (op0);
656 if ((POINTER_TYPE_P (itype)
657 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
658 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
659 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
660 {
661 split_constant_offset (op0, &var0, off);
662 *var = fold_convert (type, var0);
663 return true;
664 }
665 return false;
666 }
667
668 default:
669 return false;
670 }
671 }
672
673 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
674 will be ssizetype. */
675
676 void
677 split_constant_offset (tree exp, tree *var, tree *off)
678 {
679 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
680 enum tree_code code;
681
682 *var = exp;
683 *off = ssize_int (0);
684 STRIP_NOPS (exp);
685
686 if (tree_is_chrec (exp)
687 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
688 return;
689
690 otype = TREE_TYPE (exp);
691 code = TREE_CODE (exp);
692 extract_ops_from_tree (exp, &code, &op0, &op1);
693 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
694 {
695 *var = fold_convert (type, e);
696 *off = o;
697 }
698 }
699
700 /* Returns the address ADDR of an object in a canonical shape (without nop
701 casts, and with type of pointer to the object). */
702
703 static tree
704 canonicalize_base_object_address (tree addr)
705 {
706 tree orig = addr;
707
708 STRIP_NOPS (addr);
709
710 /* The base address may be obtained by casting from integer, in that case
711 keep the cast. */
712 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
713 return orig;
714
715 if (TREE_CODE (addr) != ADDR_EXPR)
716 return addr;
717
718 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
719 }
720
721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
722 basic block that contains it. Returns true if analysis succeed or false
723 otherwise. */
724
725 bool
726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
727 {
728 gimple stmt = DR_STMT (dr);
729 struct loop *loop = loop_containing_stmt (stmt);
730 tree ref = DR_REF (dr);
731 HOST_WIDE_INT pbitsize, pbitpos;
732 tree base, poffset;
733 enum machine_mode pmode;
734 int punsignedp, pvolatilep;
735 affine_iv base_iv, offset_iv;
736 tree init, dinit, step;
737 bool in_loop = (loop && loop->num);
738
739 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "analyze_innermost: ");
741
742 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
743 &pmode, &punsignedp, &pvolatilep, false);
744 gcc_assert (base != NULL_TREE);
745
746 if (pbitpos % BITS_PER_UNIT != 0)
747 {
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 fprintf (dump_file, "failed: bit offset alignment.\n");
750 return false;
751 }
752
753 if (TREE_CODE (base) == MEM_REF)
754 {
755 if (!integer_zerop (TREE_OPERAND (base, 1)))
756 {
757 if (!poffset)
758 {
759 double_int moff = mem_ref_offset (base);
760 poffset = double_int_to_tree (sizetype, moff);
761 }
762 else
763 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
764 }
765 base = TREE_OPERAND (base, 0);
766 }
767 else
768 base = build_fold_addr_expr (base);
769
770 if (in_loop)
771 {
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
773 false))
774 {
775 if (nest)
776 {
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, "failed: evolution of base is not"
779 " affine.\n");
780 return false;
781 }
782 else
783 {
784 base_iv.base = base;
785 base_iv.step = ssize_int (0);
786 base_iv.no_overflow = true;
787 }
788 }
789 }
790 else
791 {
792 base_iv.base = base;
793 base_iv.step = ssize_int (0);
794 base_iv.no_overflow = true;
795 }
796
797 if (!poffset)
798 {
799 offset_iv.base = ssize_int (0);
800 offset_iv.step = ssize_int (0);
801 }
802 else
803 {
804 if (!in_loop)
805 {
806 offset_iv.base = poffset;
807 offset_iv.step = ssize_int (0);
808 }
809 else if (!simple_iv (loop, loop_containing_stmt (stmt),
810 poffset, &offset_iv, false))
811 {
812 if (nest)
813 {
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "failed: evolution of offset is not"
816 " affine.\n");
817 return false;
818 }
819 else
820 {
821 offset_iv.base = poffset;
822 offset_iv.step = ssize_int (0);
823 }
824 }
825 }
826
827 init = ssize_int (pbitpos / BITS_PER_UNIT);
828 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
829 init = size_binop (PLUS_EXPR, init, dinit);
830 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
831 init = size_binop (PLUS_EXPR, init, dinit);
832
833 step = size_binop (PLUS_EXPR,
834 fold_convert (ssizetype, base_iv.step),
835 fold_convert (ssizetype, offset_iv.step));
836
837 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
838
839 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
840 DR_INIT (dr) = init;
841 DR_STEP (dr) = step;
842
843 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
844
845 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "success.\n");
847
848 return true;
849 }
850
851 /* Determines the base object and the list of indices of memory reference
852 DR, analyzed in LOOP and instantiated in loop nest NEST. */
853
854 static void
855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
856 {
857 VEC (tree, heap) *access_fns = NULL;
858 tree ref, *aref, op;
859 tree base, off, access_fn;
860 basic_block before_loop;
861
862 /* If analyzing a basic-block there are no indices to analyze
863 and thus no access functions. */
864 if (!nest)
865 {
866 DR_BASE_OBJECT (dr) = DR_REF (dr);
867 DR_ACCESS_FNS (dr) = NULL;
868 return;
869 }
870
871 ref = unshare_expr (DR_REF (dr));
872 before_loop = block_before_loop (nest);
873
874 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
875 into a two element array with a constant index. The base is
876 then just the immediate underlying object. */
877 if (TREE_CODE (ref) == REALPART_EXPR)
878 {
879 ref = TREE_OPERAND (ref, 0);
880 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
881 }
882 else if (TREE_CODE (ref) == IMAGPART_EXPR)
883 {
884 ref = TREE_OPERAND (ref, 0);
885 VEC_safe_push (tree, heap, access_fns, integer_one_node);
886 }
887
888 /* Analyze access functions of dimensions we know to be independent. */
889 aref = &ref;
890 while (handled_component_p (*aref))
891 {
892 if (TREE_CODE (*aref) == ARRAY_REF)
893 {
894 op = TREE_OPERAND (*aref, 1);
895 access_fn = analyze_scalar_evolution (loop, op);
896 access_fn = instantiate_scev (before_loop, loop, access_fn);
897 VEC_safe_push (tree, heap, access_fns, access_fn);
898 /* For ARRAY_REFs the base is the reference with the index replaced
899 by zero if we can not strip it as the outermost component. */
900 if (*aref == ref)
901 {
902 *aref = TREE_OPERAND (*aref, 0);
903 continue;
904 }
905 else
906 TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
907 }
908
909 aref = &TREE_OPERAND (*aref, 0);
910 }
911
912 /* If the address operand of a MEM_REF base has an evolution in the
913 analyzed nest, add it as an additional independent access-function. */
914 if (TREE_CODE (*aref) == MEM_REF)
915 {
916 op = TREE_OPERAND (*aref, 0);
917 access_fn = analyze_scalar_evolution (loop, op);
918 access_fn = instantiate_scev (before_loop, loop, access_fn);
919 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
920 {
921 tree orig_type;
922 base = initial_condition (access_fn);
923 orig_type = TREE_TYPE (base);
924 STRIP_USELESS_TYPE_CONVERSION (base);
925 split_constant_offset (base, &base, &off);
926 /* Fold the MEM_REF offset into the evolutions initial
927 value to make more bases comparable. */
928 if (!integer_zerop (TREE_OPERAND (*aref, 1)))
929 {
930 off = size_binop (PLUS_EXPR, off,
931 fold_convert (ssizetype,
932 TREE_OPERAND (*aref, 1)));
933 TREE_OPERAND (*aref, 1)
934 = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
935 }
936 access_fn = chrec_replace_initial_condition
937 (access_fn, fold_convert (orig_type, off));
938 *aref = fold_build2_loc (EXPR_LOCATION (*aref),
939 MEM_REF, TREE_TYPE (*aref),
940 base, TREE_OPERAND (*aref, 1));
941 VEC_safe_push (tree, heap, access_fns, access_fn);
942 }
943 }
944
945 DR_BASE_OBJECT (dr) = ref;
946 DR_ACCESS_FNS (dr) = access_fns;
947 }
948
949 /* Extracts the alias analysis information from the memory reference DR. */
950
951 static void
952 dr_analyze_alias (struct data_reference *dr)
953 {
954 tree ref = DR_REF (dr);
955 tree base = get_base_address (ref), addr;
956
957 if (INDIRECT_REF_P (base)
958 || TREE_CODE (base) == MEM_REF)
959 {
960 addr = TREE_OPERAND (base, 0);
961 if (TREE_CODE (addr) == SSA_NAME)
962 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
963 }
964 }
965
966 /* Frees data reference DR. */
967
968 void
969 free_data_ref (data_reference_p dr)
970 {
971 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
972 free (dr);
973 }
974
975 /* Analyzes memory reference MEMREF accessed in STMT. The reference
976 is read if IS_READ is true, write otherwise. Returns the
977 data_reference description of MEMREF. NEST is the outermost loop
978 in which the reference should be instantiated, LOOP is the loop in
979 which the data reference should be analyzed. */
980
981 struct data_reference *
982 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
983 bool is_read)
984 {
985 struct data_reference *dr;
986
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 {
989 fprintf (dump_file, "Creating dr for ");
990 print_generic_expr (dump_file, memref, TDF_SLIM);
991 fprintf (dump_file, "\n");
992 }
993
994 dr = XCNEW (struct data_reference);
995 DR_STMT (dr) = stmt;
996 DR_REF (dr) = memref;
997 DR_IS_READ (dr) = is_read;
998
999 dr_analyze_innermost (dr, nest);
1000 dr_analyze_indices (dr, nest, loop);
1001 dr_analyze_alias (dr);
1002
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 {
1005 unsigned i;
1006 fprintf (dump_file, "\tbase_address: ");
1007 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1008 fprintf (dump_file, "\n\toffset from base address: ");
1009 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1010 fprintf (dump_file, "\n\tconstant offset from base address: ");
1011 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1012 fprintf (dump_file, "\n\tstep: ");
1013 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1014 fprintf (dump_file, "\n\taligned to: ");
1015 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1016 fprintf (dump_file, "\n\tbase_object: ");
1017 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1018 fprintf (dump_file, "\n");
1019 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1020 {
1021 fprintf (dump_file, "\tAccess function %d: ", i);
1022 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1023 }
1024 }
1025
1026 return dr;
1027 }
1028
1029 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1030 expressions. */
1031 static bool
1032 dr_equal_offsets_p1 (tree offset1, tree offset2)
1033 {
1034 bool res;
1035
1036 STRIP_NOPS (offset1);
1037 STRIP_NOPS (offset2);
1038
1039 if (offset1 == offset2)
1040 return true;
1041
1042 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1043 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1044 return false;
1045
1046 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1047 TREE_OPERAND (offset2, 0));
1048
1049 if (!res || !BINARY_CLASS_P (offset1))
1050 return res;
1051
1052 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1053 TREE_OPERAND (offset2, 1));
1054
1055 return res;
1056 }
1057
1058 /* Check if DRA and DRB have equal offsets. */
1059 bool
1060 dr_equal_offsets_p (struct data_reference *dra,
1061 struct data_reference *drb)
1062 {
1063 tree offset1, offset2;
1064
1065 offset1 = DR_OFFSET (dra);
1066 offset2 = DR_OFFSET (drb);
1067
1068 return dr_equal_offsets_p1 (offset1, offset2);
1069 }
1070
1071 /* Returns true if FNA == FNB. */
1072
1073 static bool
1074 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1075 {
1076 unsigned i, n = VEC_length (tree, fna);
1077
1078 if (n != VEC_length (tree, fnb))
1079 return false;
1080
1081 for (i = 0; i < n; i++)
1082 if (!operand_equal_p (VEC_index (tree, fna, i),
1083 VEC_index (tree, fnb, i), 0))
1084 return false;
1085
1086 return true;
1087 }
1088
1089 /* If all the functions in CF are the same, returns one of them,
1090 otherwise returns NULL. */
1091
1092 static affine_fn
1093 common_affine_function (conflict_function *cf)
1094 {
1095 unsigned i;
1096 affine_fn comm;
1097
1098 if (!CF_NONTRIVIAL_P (cf))
1099 return NULL;
1100
1101 comm = cf->fns[0];
1102
1103 for (i = 1; i < cf->n; i++)
1104 if (!affine_function_equal_p (comm, cf->fns[i]))
1105 return NULL;
1106
1107 return comm;
1108 }
1109
1110 /* Returns the base of the affine function FN. */
1111
1112 static tree
1113 affine_function_base (affine_fn fn)
1114 {
1115 return VEC_index (tree, fn, 0);
1116 }
1117
1118 /* Returns true if FN is a constant. */
1119
1120 static bool
1121 affine_function_constant_p (affine_fn fn)
1122 {
1123 unsigned i;
1124 tree coef;
1125
1126 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1127 if (!integer_zerop (coef))
1128 return false;
1129
1130 return true;
1131 }
1132
1133 /* Returns true if FN is the zero constant function. */
1134
1135 static bool
1136 affine_function_zero_p (affine_fn fn)
1137 {
1138 return (integer_zerop (affine_function_base (fn))
1139 && affine_function_constant_p (fn));
1140 }
1141
1142 /* Returns a signed integer type with the largest precision from TA
1143 and TB. */
1144
1145 static tree
1146 signed_type_for_types (tree ta, tree tb)
1147 {
1148 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1149 return signed_type_for (ta);
1150 else
1151 return signed_type_for (tb);
1152 }
1153
1154 /* Applies operation OP on affine functions FNA and FNB, and returns the
1155 result. */
1156
1157 static affine_fn
1158 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1159 {
1160 unsigned i, n, m;
1161 affine_fn ret;
1162 tree coef;
1163
1164 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1165 {
1166 n = VEC_length (tree, fna);
1167 m = VEC_length (tree, fnb);
1168 }
1169 else
1170 {
1171 n = VEC_length (tree, fnb);
1172 m = VEC_length (tree, fna);
1173 }
1174
1175 ret = VEC_alloc (tree, heap, m);
1176 for (i = 0; i < n; i++)
1177 {
1178 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1179 TREE_TYPE (VEC_index (tree, fnb, i)));
1180
1181 VEC_quick_push (tree, ret,
1182 fold_build2 (op, type,
1183 VEC_index (tree, fna, i),
1184 VEC_index (tree, fnb, i)));
1185 }
1186
1187 for (; VEC_iterate (tree, fna, i, coef); i++)
1188 VEC_quick_push (tree, ret,
1189 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1190 coef, integer_zero_node));
1191 for (; VEC_iterate (tree, fnb, i, coef); i++)
1192 VEC_quick_push (tree, ret,
1193 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1194 integer_zero_node, coef));
1195
1196 return ret;
1197 }
1198
1199 /* Returns the sum of affine functions FNA and FNB. */
1200
1201 static affine_fn
1202 affine_fn_plus (affine_fn fna, affine_fn fnb)
1203 {
1204 return affine_fn_op (PLUS_EXPR, fna, fnb);
1205 }
1206
1207 /* Returns the difference of affine functions FNA and FNB. */
1208
1209 static affine_fn
1210 affine_fn_minus (affine_fn fna, affine_fn fnb)
1211 {
1212 return affine_fn_op (MINUS_EXPR, fna, fnb);
1213 }
1214
1215 /* Frees affine function FN. */
1216
1217 static void
1218 affine_fn_free (affine_fn fn)
1219 {
1220 VEC_free (tree, heap, fn);
1221 }
1222
1223 /* Determine for each subscript in the data dependence relation DDR
1224 the distance. */
1225
1226 static void
1227 compute_subscript_distance (struct data_dependence_relation *ddr)
1228 {
1229 conflict_function *cf_a, *cf_b;
1230 affine_fn fn_a, fn_b, diff;
1231
1232 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1233 {
1234 unsigned int i;
1235
1236 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1237 {
1238 struct subscript *subscript;
1239
1240 subscript = DDR_SUBSCRIPT (ddr, i);
1241 cf_a = SUB_CONFLICTS_IN_A (subscript);
1242 cf_b = SUB_CONFLICTS_IN_B (subscript);
1243
1244 fn_a = common_affine_function (cf_a);
1245 fn_b = common_affine_function (cf_b);
1246 if (!fn_a || !fn_b)
1247 {
1248 SUB_DISTANCE (subscript) = chrec_dont_know;
1249 return;
1250 }
1251 diff = affine_fn_minus (fn_a, fn_b);
1252
1253 if (affine_function_constant_p (diff))
1254 SUB_DISTANCE (subscript) = affine_function_base (diff);
1255 else
1256 SUB_DISTANCE (subscript) = chrec_dont_know;
1257
1258 affine_fn_free (diff);
1259 }
1260 }
1261 }
1262
1263 /* Returns the conflict function for "unknown". */
1264
1265 static conflict_function *
1266 conflict_fn_not_known (void)
1267 {
1268 conflict_function *fn = XCNEW (conflict_function);
1269 fn->n = NOT_KNOWN;
1270
1271 return fn;
1272 }
1273
1274 /* Returns the conflict function for "independent". */
1275
1276 static conflict_function *
1277 conflict_fn_no_dependence (void)
1278 {
1279 conflict_function *fn = XCNEW (conflict_function);
1280 fn->n = NO_DEPENDENCE;
1281
1282 return fn;
1283 }
1284
1285 /* Returns true if the address of OBJ is invariant in LOOP. */
1286
1287 static bool
1288 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1289 {
1290 while (handled_component_p (obj))
1291 {
1292 if (TREE_CODE (obj) == ARRAY_REF)
1293 {
1294 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1295 need to check the stride and the lower bound of the reference. */
1296 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297 loop->num)
1298 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1299 loop->num))
1300 return false;
1301 }
1302 else if (TREE_CODE (obj) == COMPONENT_REF)
1303 {
1304 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1305 loop->num))
1306 return false;
1307 }
1308 obj = TREE_OPERAND (obj, 0);
1309 }
1310
1311 if (!INDIRECT_REF_P (obj)
1312 && TREE_CODE (obj) != MEM_REF)
1313 return true;
1314
1315 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1316 loop->num);
1317 }
1318
1319 /* Returns false if we can prove that data references A and B do not alias,
1320 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1321 considered. */
1322
1323 bool
1324 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1325 bool loop_nest)
1326 {
1327 tree addr_a = DR_BASE_OBJECT (a);
1328 tree addr_b = DR_BASE_OBJECT (b);
1329
1330 /* If we are not processing a loop nest but scalar code we
1331 do not need to care about possible cross-iteration dependences
1332 and thus can process the full original reference. Do so,
1333 similar to how loop invariant motion applies extra offset-based
1334 disambiguation. */
1335 if (!loop_nest)
1336 {
1337 aff_tree off1, off2;
1338 double_int size1, size2;
1339 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1340 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1341 aff_combination_scale (&off1, double_int_minus_one);
1342 aff_combination_add (&off2, &off1);
1343 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1344 return false;
1345 }
1346
1347 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1348 return refs_output_dependent_p (addr_a, addr_b);
1349 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1350 return refs_anti_dependent_p (addr_a, addr_b);
1351 return refs_may_alias_p (addr_a, addr_b);
1352 }
1353
1354 /* Initialize a data dependence relation between data accesses A and
1355 B. NB_LOOPS is the number of loops surrounding the references: the
1356 size of the classic distance/direction vectors. */
1357
1358 struct data_dependence_relation *
1359 initialize_data_dependence_relation (struct data_reference *a,
1360 struct data_reference *b,
1361 VEC (loop_p, heap) *loop_nest)
1362 {
1363 struct data_dependence_relation *res;
1364 unsigned int i;
1365
1366 res = XNEW (struct data_dependence_relation);
1367 DDR_A (res) = a;
1368 DDR_B (res) = b;
1369 DDR_LOOP_NEST (res) = NULL;
1370 DDR_REVERSED_P (res) = false;
1371 DDR_SUBSCRIPTS (res) = NULL;
1372 DDR_DIR_VECTS (res) = NULL;
1373 DDR_DIST_VECTS (res) = NULL;
1374
1375 if (a == NULL || b == NULL)
1376 {
1377 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1378 return res;
1379 }
1380
1381 /* If the data references do not alias, then they are independent. */
1382 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1383 {
1384 DDR_ARE_DEPENDENT (res) = chrec_known;
1385 return res;
1386 }
1387
1388 /* When the references are exactly the same, don't spend time doing
1389 the data dependence tests, just initialize the ddr and return. */
1390 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1391 {
1392 DDR_AFFINE_P (res) = true;
1393 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1394 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1395 DDR_LOOP_NEST (res) = loop_nest;
1396 DDR_INNER_LOOP (res) = 0;
1397 DDR_SELF_REFERENCE (res) = true;
1398 compute_self_dependence (res);
1399 return res;
1400 }
1401
1402 /* If the references do not access the same object, we do not know
1403 whether they alias or not. */
1404 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1405 {
1406 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407 return res;
1408 }
1409
1410 /* If the base of the object is not invariant in the loop nest, we cannot
1411 analyze it. TODO -- in fact, it would suffice to record that there may
1412 be arbitrary dependences in the loops where the base object varies. */
1413 if (loop_nest
1414 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1415 DR_BASE_OBJECT (a)))
1416 {
1417 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1418 return res;
1419 }
1420
1421 /* If the number of dimensions of the access to not agree we can have
1422 a pointer access to a component of the array element type and an
1423 array access while the base-objects are still the same. Punt. */
1424 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1425 {
1426 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1427 return res;
1428 }
1429
1430 DDR_AFFINE_P (res) = true;
1431 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1432 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1433 DDR_LOOP_NEST (res) = loop_nest;
1434 DDR_INNER_LOOP (res) = 0;
1435 DDR_SELF_REFERENCE (res) = false;
1436
1437 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1438 {
1439 struct subscript *subscript;
1440
1441 subscript = XNEW (struct subscript);
1442 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1443 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1444 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1445 SUB_DISTANCE (subscript) = chrec_dont_know;
1446 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1447 }
1448
1449 return res;
1450 }
1451
1452 /* Frees memory used by the conflict function F. */
1453
1454 static void
1455 free_conflict_function (conflict_function *f)
1456 {
1457 unsigned i;
1458
1459 if (CF_NONTRIVIAL_P (f))
1460 {
1461 for (i = 0; i < f->n; i++)
1462 affine_fn_free (f->fns[i]);
1463 }
1464 free (f);
1465 }
1466
1467 /* Frees memory used by SUBSCRIPTS. */
1468
1469 static void
1470 free_subscripts (VEC (subscript_p, heap) *subscripts)
1471 {
1472 unsigned i;
1473 subscript_p s;
1474
1475 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1476 {
1477 free_conflict_function (s->conflicting_iterations_in_a);
1478 free_conflict_function (s->conflicting_iterations_in_b);
1479 free (s);
1480 }
1481 VEC_free (subscript_p, heap, subscripts);
1482 }
1483
1484 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1485 description. */
1486
1487 static inline void
1488 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1489 tree chrec)
1490 {
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1492 {
1493 fprintf (dump_file, "(dependence classified: ");
1494 print_generic_expr (dump_file, chrec, 0);
1495 fprintf (dump_file, ")\n");
1496 }
1497
1498 DDR_ARE_DEPENDENT (ddr) = chrec;
1499 free_subscripts (DDR_SUBSCRIPTS (ddr));
1500 DDR_SUBSCRIPTS (ddr) = NULL;
1501 }
1502
1503 /* The dependence relation DDR cannot be represented by a distance
1504 vector. */
1505
1506 static inline void
1507 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1508 {
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1510 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1511
1512 DDR_AFFINE_P (ddr) = false;
1513 }
1514
1515 \f
1516
1517 /* This section contains the classic Banerjee tests. */
1518
1519 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1520 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1521
1522 static inline bool
1523 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1524 {
1525 return (evolution_function_is_constant_p (chrec_a)
1526 && evolution_function_is_constant_p (chrec_b));
1527 }
1528
1529 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1530 variable, i.e., if the SIV (Single Index Variable) test is true. */
1531
1532 static bool
1533 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1534 {
1535 if ((evolution_function_is_constant_p (chrec_a)
1536 && evolution_function_is_univariate_p (chrec_b))
1537 || (evolution_function_is_constant_p (chrec_b)
1538 && evolution_function_is_univariate_p (chrec_a)))
1539 return true;
1540
1541 if (evolution_function_is_univariate_p (chrec_a)
1542 && evolution_function_is_univariate_p (chrec_b))
1543 {
1544 switch (TREE_CODE (chrec_a))
1545 {
1546 case POLYNOMIAL_CHREC:
1547 switch (TREE_CODE (chrec_b))
1548 {
1549 case POLYNOMIAL_CHREC:
1550 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1551 return false;
1552
1553 default:
1554 return true;
1555 }
1556
1557 default:
1558 return true;
1559 }
1560 }
1561
1562 return false;
1563 }
1564
1565 /* Creates a conflict function with N dimensions. The affine functions
1566 in each dimension follow. */
1567
1568 static conflict_function *
1569 conflict_fn (unsigned n, ...)
1570 {
1571 unsigned i;
1572 conflict_function *ret = XCNEW (conflict_function);
1573 va_list ap;
1574
1575 gcc_assert (0 < n && n <= MAX_DIM);
1576 va_start(ap, n);
1577
1578 ret->n = n;
1579 for (i = 0; i < n; i++)
1580 ret->fns[i] = va_arg (ap, affine_fn);
1581 va_end(ap);
1582
1583 return ret;
1584 }
1585
1586 /* Returns constant affine function with value CST. */
1587
1588 static affine_fn
1589 affine_fn_cst (tree cst)
1590 {
1591 affine_fn fn = VEC_alloc (tree, heap, 1);
1592 VEC_quick_push (tree, fn, cst);
1593 return fn;
1594 }
1595
1596 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1597
1598 static affine_fn
1599 affine_fn_univar (tree cst, unsigned dim, tree coef)
1600 {
1601 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1602 unsigned i;
1603
1604 gcc_assert (dim > 0);
1605 VEC_quick_push (tree, fn, cst);
1606 for (i = 1; i < dim; i++)
1607 VEC_quick_push (tree, fn, integer_zero_node);
1608 VEC_quick_push (tree, fn, coef);
1609 return fn;
1610 }
1611
1612 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1613 *OVERLAPS_B are initialized to the functions that describe the
1614 relation between the elements accessed twice by CHREC_A and
1615 CHREC_B. For k >= 0, the following property is verified:
1616
1617 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1618
1619 static void
1620 analyze_ziv_subscript (tree chrec_a,
1621 tree chrec_b,
1622 conflict_function **overlaps_a,
1623 conflict_function **overlaps_b,
1624 tree *last_conflicts)
1625 {
1626 tree type, difference;
1627 dependence_stats.num_ziv++;
1628
1629 if (dump_file && (dump_flags & TDF_DETAILS))
1630 fprintf (dump_file, "(analyze_ziv_subscript \n");
1631
1632 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1633 chrec_a = chrec_convert (type, chrec_a, NULL);
1634 chrec_b = chrec_convert (type, chrec_b, NULL);
1635 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1636
1637 switch (TREE_CODE (difference))
1638 {
1639 case INTEGER_CST:
1640 if (integer_zerop (difference))
1641 {
1642 /* The difference is equal to zero: the accessed index
1643 overlaps for each iteration in the loop. */
1644 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1645 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1646 *last_conflicts = chrec_dont_know;
1647 dependence_stats.num_ziv_dependent++;
1648 }
1649 else
1650 {
1651 /* The accesses do not overlap. */
1652 *overlaps_a = conflict_fn_no_dependence ();
1653 *overlaps_b = conflict_fn_no_dependence ();
1654 *last_conflicts = integer_zero_node;
1655 dependence_stats.num_ziv_independent++;
1656 }
1657 break;
1658
1659 default:
1660 /* We're not sure whether the indexes overlap. For the moment,
1661 conservatively answer "don't know". */
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1664
1665 *overlaps_a = conflict_fn_not_known ();
1666 *overlaps_b = conflict_fn_not_known ();
1667 *last_conflicts = chrec_dont_know;
1668 dependence_stats.num_ziv_unimplemented++;
1669 break;
1670 }
1671
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, ")\n");
1674 }
1675
1676 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1677 and only if it fits to the int type. If this is not the case, or the
1678 bound on the number of iterations of LOOP could not be derived, returns
1679 chrec_dont_know. */
1680
1681 static tree
1682 max_stmt_executions_tree (struct loop *loop)
1683 {
1684 double_int nit;
1685
1686 if (!max_stmt_executions (loop, true, &nit))
1687 return chrec_dont_know;
1688
1689 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1690 return chrec_dont_know;
1691
1692 return double_int_to_tree (unsigned_type_node, nit);
1693 }
1694
1695 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1696 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1697 *OVERLAPS_B are initialized to the functions that describe the
1698 relation between the elements accessed twice by CHREC_A and
1699 CHREC_B. For k >= 0, the following property is verified:
1700
1701 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1702
1703 static void
1704 analyze_siv_subscript_cst_affine (tree chrec_a,
1705 tree chrec_b,
1706 conflict_function **overlaps_a,
1707 conflict_function **overlaps_b,
1708 tree *last_conflicts)
1709 {
1710 bool value0, value1, value2;
1711 tree type, difference, tmp;
1712
1713 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1714 chrec_a = chrec_convert (type, chrec_a, NULL);
1715 chrec_b = chrec_convert (type, chrec_b, NULL);
1716 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1717
1718 if (!chrec_is_positive (initial_condition (difference), &value0))
1719 {
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1721 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1722
1723 dependence_stats.num_siv_unimplemented++;
1724 *overlaps_a = conflict_fn_not_known ();
1725 *overlaps_b = conflict_fn_not_known ();
1726 *last_conflicts = chrec_dont_know;
1727 return;
1728 }
1729 else
1730 {
1731 if (value0 == false)
1732 {
1733 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1734 {
1735 if (dump_file && (dump_flags & TDF_DETAILS))
1736 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1737
1738 *overlaps_a = conflict_fn_not_known ();
1739 *overlaps_b = conflict_fn_not_known ();
1740 *last_conflicts = chrec_dont_know;
1741 dependence_stats.num_siv_unimplemented++;
1742 return;
1743 }
1744 else
1745 {
1746 if (value1 == true)
1747 {
1748 /* Example:
1749 chrec_a = 12
1750 chrec_b = {10, +, 1}
1751 */
1752
1753 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1754 {
1755 HOST_WIDE_INT numiter;
1756 struct loop *loop = get_chrec_loop (chrec_b);
1757
1758 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1759 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1760 fold_build1 (ABS_EXPR, type, difference),
1761 CHREC_RIGHT (chrec_b));
1762 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1763 *last_conflicts = integer_one_node;
1764
1765
1766 /* Perform weak-zero siv test to see if overlap is
1767 outside the loop bounds. */
1768 numiter = max_stmt_executions_int (loop, true);
1769
1770 if (numiter >= 0
1771 && compare_tree_int (tmp, numiter) > 0)
1772 {
1773 free_conflict_function (*overlaps_a);
1774 free_conflict_function (*overlaps_b);
1775 *overlaps_a = conflict_fn_no_dependence ();
1776 *overlaps_b = conflict_fn_no_dependence ();
1777 *last_conflicts = integer_zero_node;
1778 dependence_stats.num_siv_independent++;
1779 return;
1780 }
1781 dependence_stats.num_siv_dependent++;
1782 return;
1783 }
1784
1785 /* When the step does not divide the difference, there are
1786 no overlaps. */
1787 else
1788 {
1789 *overlaps_a = conflict_fn_no_dependence ();
1790 *overlaps_b = conflict_fn_no_dependence ();
1791 *last_conflicts = integer_zero_node;
1792 dependence_stats.num_siv_independent++;
1793 return;
1794 }
1795 }
1796
1797 else
1798 {
1799 /* Example:
1800 chrec_a = 12
1801 chrec_b = {10, +, -1}
1802
1803 In this case, chrec_a will not overlap with chrec_b. */
1804 *overlaps_a = conflict_fn_no_dependence ();
1805 *overlaps_b = conflict_fn_no_dependence ();
1806 *last_conflicts = integer_zero_node;
1807 dependence_stats.num_siv_independent++;
1808 return;
1809 }
1810 }
1811 }
1812 else
1813 {
1814 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1815 {
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1818
1819 *overlaps_a = conflict_fn_not_known ();
1820 *overlaps_b = conflict_fn_not_known ();
1821 *last_conflicts = chrec_dont_know;
1822 dependence_stats.num_siv_unimplemented++;
1823 return;
1824 }
1825 else
1826 {
1827 if (value2 == false)
1828 {
1829 /* Example:
1830 chrec_a = 3
1831 chrec_b = {10, +, -1}
1832 */
1833 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1834 {
1835 HOST_WIDE_INT numiter;
1836 struct loop *loop = get_chrec_loop (chrec_b);
1837
1838 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1839 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1840 CHREC_RIGHT (chrec_b));
1841 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1842 *last_conflicts = integer_one_node;
1843
1844 /* Perform weak-zero siv test to see if overlap is
1845 outside the loop bounds. */
1846 numiter = max_stmt_executions_int (loop, true);
1847
1848 if (numiter >= 0
1849 && compare_tree_int (tmp, numiter) > 0)
1850 {
1851 free_conflict_function (*overlaps_a);
1852 free_conflict_function (*overlaps_b);
1853 *overlaps_a = conflict_fn_no_dependence ();
1854 *overlaps_b = conflict_fn_no_dependence ();
1855 *last_conflicts = integer_zero_node;
1856 dependence_stats.num_siv_independent++;
1857 return;
1858 }
1859 dependence_stats.num_siv_dependent++;
1860 return;
1861 }
1862
1863 /* When the step does not divide the difference, there
1864 are no overlaps. */
1865 else
1866 {
1867 *overlaps_a = conflict_fn_no_dependence ();
1868 *overlaps_b = conflict_fn_no_dependence ();
1869 *last_conflicts = integer_zero_node;
1870 dependence_stats.num_siv_independent++;
1871 return;
1872 }
1873 }
1874 else
1875 {
1876 /* Example:
1877 chrec_a = 3
1878 chrec_b = {4, +, 1}
1879
1880 In this case, chrec_a will not overlap with chrec_b. */
1881 *overlaps_a = conflict_fn_no_dependence ();
1882 *overlaps_b = conflict_fn_no_dependence ();
1883 *last_conflicts = integer_zero_node;
1884 dependence_stats.num_siv_independent++;
1885 return;
1886 }
1887 }
1888 }
1889 }
1890 }
1891
1892 /* Helper recursive function for initializing the matrix A. Returns
1893 the initial value of CHREC. */
1894
1895 static tree
1896 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1897 {
1898 gcc_assert (chrec);
1899
1900 switch (TREE_CODE (chrec))
1901 {
1902 case POLYNOMIAL_CHREC:
1903 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1904
1905 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1906 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1907
1908 case PLUS_EXPR:
1909 case MULT_EXPR:
1910 case MINUS_EXPR:
1911 {
1912 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1913 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1914
1915 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1916 }
1917
1918 case NOP_EXPR:
1919 {
1920 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1921 return chrec_convert (chrec_type (chrec), op, NULL);
1922 }
1923
1924 case BIT_NOT_EXPR:
1925 {
1926 /* Handle ~X as -1 - X. */
1927 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1928 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1929 build_int_cst (TREE_TYPE (chrec), -1), op);
1930 }
1931
1932 case INTEGER_CST:
1933 return chrec;
1934
1935 default:
1936 gcc_unreachable ();
1937 return NULL_TREE;
1938 }
1939 }
1940
1941 #define FLOOR_DIV(x,y) ((x) / (y))
1942
1943 /* Solves the special case of the Diophantine equation:
1944 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1945
1946 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1947 number of iterations that loops X and Y run. The overlaps will be
1948 constructed as evolutions in dimension DIM. */
1949
1950 static void
1951 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1952 affine_fn *overlaps_a,
1953 affine_fn *overlaps_b,
1954 tree *last_conflicts, int dim)
1955 {
1956 if (((step_a > 0 && step_b > 0)
1957 || (step_a < 0 && step_b < 0)))
1958 {
1959 int step_overlaps_a, step_overlaps_b;
1960 int gcd_steps_a_b, last_conflict, tau2;
1961
1962 gcd_steps_a_b = gcd (step_a, step_b);
1963 step_overlaps_a = step_b / gcd_steps_a_b;
1964 step_overlaps_b = step_a / gcd_steps_a_b;
1965
1966 if (niter > 0)
1967 {
1968 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1969 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1970 last_conflict = tau2;
1971 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1972 }
1973 else
1974 *last_conflicts = chrec_dont_know;
1975
1976 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1977 build_int_cst (NULL_TREE,
1978 step_overlaps_a));
1979 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1980 build_int_cst (NULL_TREE,
1981 step_overlaps_b));
1982 }
1983
1984 else
1985 {
1986 *overlaps_a = affine_fn_cst (integer_zero_node);
1987 *overlaps_b = affine_fn_cst (integer_zero_node);
1988 *last_conflicts = integer_zero_node;
1989 }
1990 }
1991
1992 /* Solves the special case of a Diophantine equation where CHREC_A is
1993 an affine bivariate function, and CHREC_B is an affine univariate
1994 function. For example,
1995
1996 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1997
1998 has the following overlapping functions:
1999
2000 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2001 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2002 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2003
2004 FORNOW: This is a specialized implementation for a case occurring in
2005 a common benchmark. Implement the general algorithm. */
2006
2007 static void
2008 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2009 conflict_function **overlaps_a,
2010 conflict_function **overlaps_b,
2011 tree *last_conflicts)
2012 {
2013 bool xz_p, yz_p, xyz_p;
2014 int step_x, step_y, step_z;
2015 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2016 affine_fn overlaps_a_xz, overlaps_b_xz;
2017 affine_fn overlaps_a_yz, overlaps_b_yz;
2018 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2019 affine_fn ova1, ova2, ovb;
2020 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2021
2022 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2023 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2024 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2025
2026 niter_x =
2027 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2028 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2029 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2030
2031 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2032 {
2033 if (dump_file && (dump_flags & TDF_DETAILS))
2034 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2035
2036 *overlaps_a = conflict_fn_not_known ();
2037 *overlaps_b = conflict_fn_not_known ();
2038 *last_conflicts = chrec_dont_know;
2039 return;
2040 }
2041
2042 niter = MIN (niter_x, niter_z);
2043 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2044 &overlaps_a_xz,
2045 &overlaps_b_xz,
2046 &last_conflicts_xz, 1);
2047 niter = MIN (niter_y, niter_z);
2048 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2049 &overlaps_a_yz,
2050 &overlaps_b_yz,
2051 &last_conflicts_yz, 2);
2052 niter = MIN (niter_x, niter_z);
2053 niter = MIN (niter_y, niter);
2054 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2055 &overlaps_a_xyz,
2056 &overlaps_b_xyz,
2057 &last_conflicts_xyz, 3);
2058
2059 xz_p = !integer_zerop (last_conflicts_xz);
2060 yz_p = !integer_zerop (last_conflicts_yz);
2061 xyz_p = !integer_zerop (last_conflicts_xyz);
2062
2063 if (xz_p || yz_p || xyz_p)
2064 {
2065 ova1 = affine_fn_cst (integer_zero_node);
2066 ova2 = affine_fn_cst (integer_zero_node);
2067 ovb = affine_fn_cst (integer_zero_node);
2068 if (xz_p)
2069 {
2070 affine_fn t0 = ova1;
2071 affine_fn t2 = ovb;
2072
2073 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2074 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2075 affine_fn_free (t0);
2076 affine_fn_free (t2);
2077 *last_conflicts = last_conflicts_xz;
2078 }
2079 if (yz_p)
2080 {
2081 affine_fn t0 = ova2;
2082 affine_fn t2 = ovb;
2083
2084 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2085 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2086 affine_fn_free (t0);
2087 affine_fn_free (t2);
2088 *last_conflicts = last_conflicts_yz;
2089 }
2090 if (xyz_p)
2091 {
2092 affine_fn t0 = ova1;
2093 affine_fn t2 = ova2;
2094 affine_fn t4 = ovb;
2095
2096 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2097 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2098 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2099 affine_fn_free (t0);
2100 affine_fn_free (t2);
2101 affine_fn_free (t4);
2102 *last_conflicts = last_conflicts_xyz;
2103 }
2104 *overlaps_a = conflict_fn (2, ova1, ova2);
2105 *overlaps_b = conflict_fn (1, ovb);
2106 }
2107 else
2108 {
2109 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2110 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2111 *last_conflicts = integer_zero_node;
2112 }
2113
2114 affine_fn_free (overlaps_a_xz);
2115 affine_fn_free (overlaps_b_xz);
2116 affine_fn_free (overlaps_a_yz);
2117 affine_fn_free (overlaps_b_yz);
2118 affine_fn_free (overlaps_a_xyz);
2119 affine_fn_free (overlaps_b_xyz);
2120 }
2121
2122 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2123
2124 static void
2125 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2126 int size)
2127 {
2128 memcpy (vec2, vec1, size * sizeof (*vec1));
2129 }
2130
2131 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2132
2133 static void
2134 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2135 int m, int n)
2136 {
2137 int i;
2138
2139 for (i = 0; i < m; i++)
2140 lambda_vector_copy (mat1[i], mat2[i], n);
2141 }
2142
2143 /* Store the N x N identity matrix in MAT. */
2144
2145 static void
2146 lambda_matrix_id (lambda_matrix mat, int size)
2147 {
2148 int i, j;
2149
2150 for (i = 0; i < size; i++)
2151 for (j = 0; j < size; j++)
2152 mat[i][j] = (i == j) ? 1 : 0;
2153 }
2154
2155 /* Return the first nonzero element of vector VEC1 between START and N.
2156 We must have START <= N. Returns N if VEC1 is the zero vector. */
2157
2158 static int
2159 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2160 {
2161 int j = start;
2162 while (j < n && vec1[j] == 0)
2163 j++;
2164 return j;
2165 }
2166
2167 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2168 R2 = R2 + CONST1 * R1. */
2169
2170 static void
2171 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2172 {
2173 int i;
2174
2175 if (const1 == 0)
2176 return;
2177
2178 for (i = 0; i < n; i++)
2179 mat[r2][i] += const1 * mat[r1][i];
2180 }
2181
2182 /* Swap rows R1 and R2 in matrix MAT. */
2183
2184 static void
2185 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2186 {
2187 lambda_vector row;
2188
2189 row = mat[r1];
2190 mat[r1] = mat[r2];
2191 mat[r2] = row;
2192 }
2193
2194 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2195 and store the result in VEC2. */
2196
2197 static void
2198 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2199 int size, int const1)
2200 {
2201 int i;
2202
2203 if (const1 == 0)
2204 lambda_vector_clear (vec2, size);
2205 else
2206 for (i = 0; i < size; i++)
2207 vec2[i] = const1 * vec1[i];
2208 }
2209
2210 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2211
2212 static void
2213 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2214 int size)
2215 {
2216 lambda_vector_mult_const (vec1, vec2, size, -1);
2217 }
2218
2219 /* Negate row R1 of matrix MAT which has N columns. */
2220
2221 static void
2222 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2223 {
2224 lambda_vector_negate (mat[r1], mat[r1], n);
2225 }
2226
2227 /* Return true if two vectors are equal. */
2228
2229 static bool
2230 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2231 {
2232 int i;
2233 for (i = 0; i < size; i++)
2234 if (vec1[i] != vec2[i])
2235 return false;
2236 return true;
2237 }
2238
2239 /* Given an M x N integer matrix A, this function determines an M x
2240 M unimodular matrix U, and an M x N echelon matrix S such that
2241 "U.A = S". This decomposition is also known as "right Hermite".
2242
2243 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2244 Restructuring Compilers" Utpal Banerjee. */
2245
2246 static void
2247 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2248 lambda_matrix S, lambda_matrix U)
2249 {
2250 int i, j, i0 = 0;
2251
2252 lambda_matrix_copy (A, S, m, n);
2253 lambda_matrix_id (U, m);
2254
2255 for (j = 0; j < n; j++)
2256 {
2257 if (lambda_vector_first_nz (S[j], m, i0) < m)
2258 {
2259 ++i0;
2260 for (i = m - 1; i >= i0; i--)
2261 {
2262 while (S[i][j] != 0)
2263 {
2264 int sigma, factor, a, b;
2265
2266 a = S[i-1][j];
2267 b = S[i][j];
2268 sigma = (a * b < 0) ? -1: 1;
2269 a = abs (a);
2270 b = abs (b);
2271 factor = sigma * (a / b);
2272
2273 lambda_matrix_row_add (S, n, i, i-1, -factor);
2274 lambda_matrix_row_exchange (S, i, i-1);
2275
2276 lambda_matrix_row_add (U, m, i, i-1, -factor);
2277 lambda_matrix_row_exchange (U, i, i-1);
2278 }
2279 }
2280 }
2281 }
2282 }
2283
2284 /* Determines the overlapping elements due to accesses CHREC_A and
2285 CHREC_B, that are affine functions. This function cannot handle
2286 symbolic evolution functions, ie. when initial conditions are
2287 parameters, because it uses lambda matrices of integers. */
2288
2289 static void
2290 analyze_subscript_affine_affine (tree chrec_a,
2291 tree chrec_b,
2292 conflict_function **overlaps_a,
2293 conflict_function **overlaps_b,
2294 tree *last_conflicts)
2295 {
2296 unsigned nb_vars_a, nb_vars_b, dim;
2297 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2298 lambda_matrix A, U, S;
2299 struct obstack scratch_obstack;
2300
2301 if (eq_evolutions_p (chrec_a, chrec_b))
2302 {
2303 /* The accessed index overlaps for each iteration in the
2304 loop. */
2305 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2306 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2307 *last_conflicts = chrec_dont_know;
2308 return;
2309 }
2310 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2312
2313 /* For determining the initial intersection, we have to solve a
2314 Diophantine equation. This is the most time consuming part.
2315
2316 For answering to the question: "Is there a dependence?" we have
2317 to prove that there exists a solution to the Diophantine
2318 equation, and that the solution is in the iteration domain,
2319 i.e. the solution is positive or zero, and that the solution
2320 happens before the upper bound loop.nb_iterations. Otherwise
2321 there is no dependence. This function outputs a description of
2322 the iterations that hold the intersections. */
2323
2324 nb_vars_a = nb_vars_in_chrec (chrec_a);
2325 nb_vars_b = nb_vars_in_chrec (chrec_b);
2326
2327 gcc_obstack_init (&scratch_obstack);
2328
2329 dim = nb_vars_a + nb_vars_b;
2330 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2331 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2332 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2333
2334 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2335 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2336 gamma = init_b - init_a;
2337
2338 /* Don't do all the hard work of solving the Diophantine equation
2339 when we already know the solution: for example,
2340 | {3, +, 1}_1
2341 | {3, +, 4}_2
2342 | gamma = 3 - 3 = 0.
2343 Then the first overlap occurs during the first iterations:
2344 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2345 */
2346 if (gamma == 0)
2347 {
2348 if (nb_vars_a == 1 && nb_vars_b == 1)
2349 {
2350 HOST_WIDE_INT step_a, step_b;
2351 HOST_WIDE_INT niter, niter_a, niter_b;
2352 affine_fn ova, ovb;
2353
2354 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2355 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2356 niter = MIN (niter_a, niter_b);
2357 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2358 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2359
2360 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2361 &ova, &ovb,
2362 last_conflicts, 1);
2363 *overlaps_a = conflict_fn (1, ova);
2364 *overlaps_b = conflict_fn (1, ovb);
2365 }
2366
2367 else if (nb_vars_a == 2 && nb_vars_b == 1)
2368 compute_overlap_steps_for_affine_1_2
2369 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2370
2371 else if (nb_vars_a == 1 && nb_vars_b == 2)
2372 compute_overlap_steps_for_affine_1_2
2373 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2374
2375 else
2376 {
2377 if (dump_file && (dump_flags & TDF_DETAILS))
2378 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2379 *overlaps_a = conflict_fn_not_known ();
2380 *overlaps_b = conflict_fn_not_known ();
2381 *last_conflicts = chrec_dont_know;
2382 }
2383 goto end_analyze_subs_aa;
2384 }
2385
2386 /* U.A = S */
2387 lambda_matrix_right_hermite (A, dim, 1, S, U);
2388
2389 if (S[0][0] < 0)
2390 {
2391 S[0][0] *= -1;
2392 lambda_matrix_row_negate (U, dim, 0);
2393 }
2394 gcd_alpha_beta = S[0][0];
2395
2396 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2397 but that is a quite strange case. Instead of ICEing, answer
2398 don't know. */
2399 if (gcd_alpha_beta == 0)
2400 {
2401 *overlaps_a = conflict_fn_not_known ();
2402 *overlaps_b = conflict_fn_not_known ();
2403 *last_conflicts = chrec_dont_know;
2404 goto end_analyze_subs_aa;
2405 }
2406
2407 /* The classic "gcd-test". */
2408 if (!int_divides_p (gcd_alpha_beta, gamma))
2409 {
2410 /* The "gcd-test" has determined that there is no integer
2411 solution, i.e. there is no dependence. */
2412 *overlaps_a = conflict_fn_no_dependence ();
2413 *overlaps_b = conflict_fn_no_dependence ();
2414 *last_conflicts = integer_zero_node;
2415 }
2416
2417 /* Both access functions are univariate. This includes SIV and MIV cases. */
2418 else if (nb_vars_a == 1 && nb_vars_b == 1)
2419 {
2420 /* Both functions should have the same evolution sign. */
2421 if (((A[0][0] > 0 && -A[1][0] > 0)
2422 || (A[0][0] < 0 && -A[1][0] < 0)))
2423 {
2424 /* The solutions are given by:
2425 |
2426 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2427 | [u21 u22] [y0]
2428
2429 For a given integer t. Using the following variables,
2430
2431 | i0 = u11 * gamma / gcd_alpha_beta
2432 | j0 = u12 * gamma / gcd_alpha_beta
2433 | i1 = u21
2434 | j1 = u22
2435
2436 the solutions are:
2437
2438 | x0 = i0 + i1 * t,
2439 | y0 = j0 + j1 * t. */
2440 HOST_WIDE_INT i0, j0, i1, j1;
2441
2442 i0 = U[0][0] * gamma / gcd_alpha_beta;
2443 j0 = U[0][1] * gamma / gcd_alpha_beta;
2444 i1 = U[1][0];
2445 j1 = U[1][1];
2446
2447 if ((i1 == 0 && i0 < 0)
2448 || (j1 == 0 && j0 < 0))
2449 {
2450 /* There is no solution.
2451 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2452 falls in here, but for the moment we don't look at the
2453 upper bound of the iteration domain. */
2454 *overlaps_a = conflict_fn_no_dependence ();
2455 *overlaps_b = conflict_fn_no_dependence ();
2456 *last_conflicts = integer_zero_node;
2457 goto end_analyze_subs_aa;
2458 }
2459
2460 if (i1 > 0 && j1 > 0)
2461 {
2462 HOST_WIDE_INT niter_a = max_stmt_executions_int
2463 (get_chrec_loop (chrec_a), true);
2464 HOST_WIDE_INT niter_b = max_stmt_executions_int
2465 (get_chrec_loop (chrec_b), true);
2466 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2467
2468 /* (X0, Y0) is a solution of the Diophantine equation:
2469 "chrec_a (X0) = chrec_b (Y0)". */
2470 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2471 CEIL (-j0, j1));
2472 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2473 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2474
2475 /* (X1, Y1) is the smallest positive solution of the eq
2476 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2477 first conflict occurs. */
2478 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2479 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2480 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2481
2482 if (niter > 0)
2483 {
2484 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2485 FLOOR_DIV (niter - j0, j1));
2486 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2487
2488 /* If the overlap occurs outside of the bounds of the
2489 loop, there is no dependence. */
2490 if (x1 >= niter || y1 >= niter)
2491 {
2492 *overlaps_a = conflict_fn_no_dependence ();
2493 *overlaps_b = conflict_fn_no_dependence ();
2494 *last_conflicts = integer_zero_node;
2495 goto end_analyze_subs_aa;
2496 }
2497 else
2498 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2499 }
2500 else
2501 *last_conflicts = chrec_dont_know;
2502
2503 *overlaps_a
2504 = conflict_fn (1,
2505 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2506 1,
2507 build_int_cst (NULL_TREE, i1)));
2508 *overlaps_b
2509 = conflict_fn (1,
2510 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2511 1,
2512 build_int_cst (NULL_TREE, j1)));
2513 }
2514 else
2515 {
2516 /* FIXME: For the moment, the upper bound of the
2517 iteration domain for i and j is not checked. */
2518 if (dump_file && (dump_flags & TDF_DETAILS))
2519 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2520 *overlaps_a = conflict_fn_not_known ();
2521 *overlaps_b = conflict_fn_not_known ();
2522 *last_conflicts = chrec_dont_know;
2523 }
2524 }
2525 else
2526 {
2527 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2529 *overlaps_a = conflict_fn_not_known ();
2530 *overlaps_b = conflict_fn_not_known ();
2531 *last_conflicts = chrec_dont_know;
2532 }
2533 }
2534 else
2535 {
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2537 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2538 *overlaps_a = conflict_fn_not_known ();
2539 *overlaps_b = conflict_fn_not_known ();
2540 *last_conflicts = chrec_dont_know;
2541 }
2542
2543 end_analyze_subs_aa:
2544 obstack_free (&scratch_obstack, NULL);
2545 if (dump_file && (dump_flags & TDF_DETAILS))
2546 {
2547 fprintf (dump_file, " (overlaps_a = ");
2548 dump_conflict_function (dump_file, *overlaps_a);
2549 fprintf (dump_file, ")\n (overlaps_b = ");
2550 dump_conflict_function (dump_file, *overlaps_b);
2551 fprintf (dump_file, ")\n");
2552 fprintf (dump_file, ")\n");
2553 }
2554 }
2555
2556 /* Returns true when analyze_subscript_affine_affine can be used for
2557 determining the dependence relation between chrec_a and chrec_b,
2558 that contain symbols. This function modifies chrec_a and chrec_b
2559 such that the analysis result is the same, and such that they don't
2560 contain symbols, and then can safely be passed to the analyzer.
2561
2562 Example: The analysis of the following tuples of evolutions produce
2563 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2564 vs. {0, +, 1}_1
2565
2566 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2567 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2568 */
2569
2570 static bool
2571 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2572 {
2573 tree diff, type, left_a, left_b, right_b;
2574
2575 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2576 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2577 /* FIXME: For the moment not handled. Might be refined later. */
2578 return false;
2579
2580 type = chrec_type (*chrec_a);
2581 left_a = CHREC_LEFT (*chrec_a);
2582 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2583 diff = chrec_fold_minus (type, left_a, left_b);
2584
2585 if (!evolution_function_is_constant_p (diff))
2586 return false;
2587
2588 if (dump_file && (dump_flags & TDF_DETAILS))
2589 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2590
2591 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2592 diff, CHREC_RIGHT (*chrec_a));
2593 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2594 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2595 build_int_cst (type, 0),
2596 right_b);
2597 return true;
2598 }
2599
2600 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2601 *OVERLAPS_B are initialized to the functions that describe the
2602 relation between the elements accessed twice by CHREC_A and
2603 CHREC_B. For k >= 0, the following property is verified:
2604
2605 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2606
2607 static void
2608 analyze_siv_subscript (tree chrec_a,
2609 tree chrec_b,
2610 conflict_function **overlaps_a,
2611 conflict_function **overlaps_b,
2612 tree *last_conflicts,
2613 int loop_nest_num)
2614 {
2615 dependence_stats.num_siv++;
2616
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2618 fprintf (dump_file, "(analyze_siv_subscript \n");
2619
2620 if (evolution_function_is_constant_p (chrec_a)
2621 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2622 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2623 overlaps_a, overlaps_b, last_conflicts);
2624
2625 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2626 && evolution_function_is_constant_p (chrec_b))
2627 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2628 overlaps_b, overlaps_a, last_conflicts);
2629
2630 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2631 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2632 {
2633 if (!chrec_contains_symbols (chrec_a)
2634 && !chrec_contains_symbols (chrec_b))
2635 {
2636 analyze_subscript_affine_affine (chrec_a, chrec_b,
2637 overlaps_a, overlaps_b,
2638 last_conflicts);
2639
2640 if (CF_NOT_KNOWN_P (*overlaps_a)
2641 || CF_NOT_KNOWN_P (*overlaps_b))
2642 dependence_stats.num_siv_unimplemented++;
2643 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2644 || CF_NO_DEPENDENCE_P (*overlaps_b))
2645 dependence_stats.num_siv_independent++;
2646 else
2647 dependence_stats.num_siv_dependent++;
2648 }
2649 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2650 &chrec_b))
2651 {
2652 analyze_subscript_affine_affine (chrec_a, chrec_b,
2653 overlaps_a, overlaps_b,
2654 last_conflicts);
2655
2656 if (CF_NOT_KNOWN_P (*overlaps_a)
2657 || CF_NOT_KNOWN_P (*overlaps_b))
2658 dependence_stats.num_siv_unimplemented++;
2659 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2660 || CF_NO_DEPENDENCE_P (*overlaps_b))
2661 dependence_stats.num_siv_independent++;
2662 else
2663 dependence_stats.num_siv_dependent++;
2664 }
2665 else
2666 goto siv_subscript_dontknow;
2667 }
2668
2669 else
2670 {
2671 siv_subscript_dontknow:;
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file, "siv test failed: unimplemented.\n");
2674 *overlaps_a = conflict_fn_not_known ();
2675 *overlaps_b = conflict_fn_not_known ();
2676 *last_conflicts = chrec_dont_know;
2677 dependence_stats.num_siv_unimplemented++;
2678 }
2679
2680 if (dump_file && (dump_flags & TDF_DETAILS))
2681 fprintf (dump_file, ")\n");
2682 }
2683
2684 /* Returns false if we can prove that the greatest common divisor of the steps
2685 of CHREC does not divide CST, false otherwise. */
2686
2687 static bool
2688 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2689 {
2690 HOST_WIDE_INT cd = 0, val;
2691 tree step;
2692
2693 if (!host_integerp (cst, 0))
2694 return true;
2695 val = tree_low_cst (cst, 0);
2696
2697 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2698 {
2699 step = CHREC_RIGHT (chrec);
2700 if (!host_integerp (step, 0))
2701 return true;
2702 cd = gcd (cd, tree_low_cst (step, 0));
2703 chrec = CHREC_LEFT (chrec);
2704 }
2705
2706 return val % cd == 0;
2707 }
2708
2709 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2710 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2711 functions that describe the relation between the elements accessed
2712 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2713 is verified:
2714
2715 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2716
2717 static void
2718 analyze_miv_subscript (tree chrec_a,
2719 tree chrec_b,
2720 conflict_function **overlaps_a,
2721 conflict_function **overlaps_b,
2722 tree *last_conflicts,
2723 struct loop *loop_nest)
2724 {
2725 tree type, difference;
2726
2727 dependence_stats.num_miv++;
2728 if (dump_file && (dump_flags & TDF_DETAILS))
2729 fprintf (dump_file, "(analyze_miv_subscript \n");
2730
2731 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2732 chrec_a = chrec_convert (type, chrec_a, NULL);
2733 chrec_b = chrec_convert (type, chrec_b, NULL);
2734 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2735
2736 if (eq_evolutions_p (chrec_a, chrec_b))
2737 {
2738 /* Access functions are the same: all the elements are accessed
2739 in the same order. */
2740 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2741 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2742 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2743 dependence_stats.num_miv_dependent++;
2744 }
2745
2746 else if (evolution_function_is_constant_p (difference)
2747 /* For the moment, the following is verified:
2748 evolution_function_is_affine_multivariate_p (chrec_a,
2749 loop_nest->num) */
2750 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2751 {
2752 /* testsuite/.../ssa-chrec-33.c
2753 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2754
2755 The difference is 1, and all the evolution steps are multiples
2756 of 2, consequently there are no overlapping elements. */
2757 *overlaps_a = conflict_fn_no_dependence ();
2758 *overlaps_b = conflict_fn_no_dependence ();
2759 *last_conflicts = integer_zero_node;
2760 dependence_stats.num_miv_independent++;
2761 }
2762
2763 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2764 && !chrec_contains_symbols (chrec_a)
2765 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2766 && !chrec_contains_symbols (chrec_b))
2767 {
2768 /* testsuite/.../ssa-chrec-35.c
2769 {0, +, 1}_2 vs. {0, +, 1}_3
2770 the overlapping elements are respectively located at iterations:
2771 {0, +, 1}_x and {0, +, 1}_x,
2772 in other words, we have the equality:
2773 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2774
2775 Other examples:
2776 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2777 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2778
2779 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2780 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2781 */
2782 analyze_subscript_affine_affine (chrec_a, chrec_b,
2783 overlaps_a, overlaps_b, last_conflicts);
2784
2785 if (CF_NOT_KNOWN_P (*overlaps_a)
2786 || CF_NOT_KNOWN_P (*overlaps_b))
2787 dependence_stats.num_miv_unimplemented++;
2788 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2789 || CF_NO_DEPENDENCE_P (*overlaps_b))
2790 dependence_stats.num_miv_independent++;
2791 else
2792 dependence_stats.num_miv_dependent++;
2793 }
2794
2795 else
2796 {
2797 /* When the analysis is too difficult, answer "don't know". */
2798 if (dump_file && (dump_flags & TDF_DETAILS))
2799 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2800
2801 *overlaps_a = conflict_fn_not_known ();
2802 *overlaps_b = conflict_fn_not_known ();
2803 *last_conflicts = chrec_dont_know;
2804 dependence_stats.num_miv_unimplemented++;
2805 }
2806
2807 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, ")\n");
2809 }
2810
2811 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2812 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2813 OVERLAP_ITERATIONS_B are initialized with two functions that
2814 describe the iterations that contain conflicting elements.
2815
2816 Remark: For an integer k >= 0, the following equality is true:
2817
2818 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2819 */
2820
2821 static void
2822 analyze_overlapping_iterations (tree chrec_a,
2823 tree chrec_b,
2824 conflict_function **overlap_iterations_a,
2825 conflict_function **overlap_iterations_b,
2826 tree *last_conflicts, struct loop *loop_nest)
2827 {
2828 unsigned int lnn = loop_nest->num;
2829
2830 dependence_stats.num_subscript_tests++;
2831
2832 if (dump_file && (dump_flags & TDF_DETAILS))
2833 {
2834 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2835 fprintf (dump_file, " (chrec_a = ");
2836 print_generic_expr (dump_file, chrec_a, 0);
2837 fprintf (dump_file, ")\n (chrec_b = ");
2838 print_generic_expr (dump_file, chrec_b, 0);
2839 fprintf (dump_file, ")\n");
2840 }
2841
2842 if (chrec_a == NULL_TREE
2843 || chrec_b == NULL_TREE
2844 || chrec_contains_undetermined (chrec_a)
2845 || chrec_contains_undetermined (chrec_b))
2846 {
2847 dependence_stats.num_subscript_undetermined++;
2848
2849 *overlap_iterations_a = conflict_fn_not_known ();
2850 *overlap_iterations_b = conflict_fn_not_known ();
2851 }
2852
2853 /* If they are the same chrec, and are affine, they overlap
2854 on every iteration. */
2855 else if (eq_evolutions_p (chrec_a, chrec_b)
2856 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2857 || operand_equal_p (chrec_a, chrec_b, 0)))
2858 {
2859 dependence_stats.num_same_subscript_function++;
2860 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2861 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2862 *last_conflicts = chrec_dont_know;
2863 }
2864
2865 /* If they aren't the same, and aren't affine, we can't do anything
2866 yet. */
2867 else if ((chrec_contains_symbols (chrec_a)
2868 || chrec_contains_symbols (chrec_b))
2869 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2870 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2871 {
2872 dependence_stats.num_subscript_undetermined++;
2873 *overlap_iterations_a = conflict_fn_not_known ();
2874 *overlap_iterations_b = conflict_fn_not_known ();
2875 }
2876
2877 else if (ziv_subscript_p (chrec_a, chrec_b))
2878 analyze_ziv_subscript (chrec_a, chrec_b,
2879 overlap_iterations_a, overlap_iterations_b,
2880 last_conflicts);
2881
2882 else if (siv_subscript_p (chrec_a, chrec_b))
2883 analyze_siv_subscript (chrec_a, chrec_b,
2884 overlap_iterations_a, overlap_iterations_b,
2885 last_conflicts, lnn);
2886
2887 else
2888 analyze_miv_subscript (chrec_a, chrec_b,
2889 overlap_iterations_a, overlap_iterations_b,
2890 last_conflicts, loop_nest);
2891
2892 if (dump_file && (dump_flags & TDF_DETAILS))
2893 {
2894 fprintf (dump_file, " (overlap_iterations_a = ");
2895 dump_conflict_function (dump_file, *overlap_iterations_a);
2896 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2897 dump_conflict_function (dump_file, *overlap_iterations_b);
2898 fprintf (dump_file, ")\n");
2899 fprintf (dump_file, ")\n");
2900 }
2901 }
2902
2903 /* Helper function for uniquely inserting distance vectors. */
2904
2905 static void
2906 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2907 {
2908 unsigned i;
2909 lambda_vector v;
2910
2911 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2912 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2913 return;
2914
2915 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2916 }
2917
2918 /* Helper function for uniquely inserting direction vectors. */
2919
2920 static void
2921 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2922 {
2923 unsigned i;
2924 lambda_vector v;
2925
2926 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2927 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2928 return;
2929
2930 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2931 }
2932
2933 /* Add a distance of 1 on all the loops outer than INDEX. If we
2934 haven't yet determined a distance for this outer loop, push a new
2935 distance vector composed of the previous distance, and a distance
2936 of 1 for this outer loop. Example:
2937
2938 | loop_1
2939 | loop_2
2940 | A[10]
2941 | endloop_2
2942 | endloop_1
2943
2944 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2945 save (0, 1), then we have to save (1, 0). */
2946
2947 static void
2948 add_outer_distances (struct data_dependence_relation *ddr,
2949 lambda_vector dist_v, int index)
2950 {
2951 /* For each outer loop where init_v is not set, the accesses are
2952 in dependence of distance 1 in the loop. */
2953 while (--index >= 0)
2954 {
2955 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2956 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2957 save_v[index] = 1;
2958 save_dist_v (ddr, save_v);
2959 }
2960 }
2961
2962 /* Return false when fail to represent the data dependence as a
2963 distance vector. INIT_B is set to true when a component has been
2964 added to the distance vector DIST_V. INDEX_CARRY is then set to
2965 the index in DIST_V that carries the dependence. */
2966
2967 static bool
2968 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2969 struct data_reference *ddr_a,
2970 struct data_reference *ddr_b,
2971 lambda_vector dist_v, bool *init_b,
2972 int *index_carry)
2973 {
2974 unsigned i;
2975 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2976
2977 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2978 {
2979 tree access_fn_a, access_fn_b;
2980 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2981
2982 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2983 {
2984 non_affine_dependence_relation (ddr);
2985 return false;
2986 }
2987
2988 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2989 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2990
2991 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2992 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2993 {
2994 int dist, index;
2995 int var_a = CHREC_VARIABLE (access_fn_a);
2996 int var_b = CHREC_VARIABLE (access_fn_b);
2997
2998 if (var_a != var_b
2999 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3000 {
3001 non_affine_dependence_relation (ddr);
3002 return false;
3003 }
3004
3005 dist = int_cst_value (SUB_DISTANCE (subscript));
3006 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3007 *index_carry = MIN (index, *index_carry);
3008
3009 /* This is the subscript coupling test. If we have already
3010 recorded a distance for this loop (a distance coming from
3011 another subscript), it should be the same. For example,
3012 in the following code, there is no dependence:
3013
3014 | loop i = 0, N, 1
3015 | T[i+1][i] = ...
3016 | ... = T[i][i]
3017 | endloop
3018 */
3019 if (init_v[index] != 0 && dist_v[index] != dist)
3020 {
3021 finalize_ddr_dependent (ddr, chrec_known);
3022 return false;
3023 }
3024
3025 dist_v[index] = dist;
3026 init_v[index] = 1;
3027 *init_b = true;
3028 }
3029 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3030 {
3031 /* This can be for example an affine vs. constant dependence
3032 (T[i] vs. T[3]) that is not an affine dependence and is
3033 not representable as a distance vector. */
3034 non_affine_dependence_relation (ddr);
3035 return false;
3036 }
3037 }
3038
3039 return true;
3040 }
3041
3042 /* Return true when the DDR contains only constant access functions. */
3043
3044 static bool
3045 constant_access_functions (const struct data_dependence_relation *ddr)
3046 {
3047 unsigned i;
3048
3049 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3050 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3051 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3052 return false;
3053
3054 return true;
3055 }
3056
3057 /* Helper function for the case where DDR_A and DDR_B are the same
3058 multivariate access function with a constant step. For an example
3059 see pr34635-1.c. */
3060
3061 static void
3062 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3063 {
3064 int x_1, x_2;
3065 tree c_1 = CHREC_LEFT (c_2);
3066 tree c_0 = CHREC_LEFT (c_1);
3067 lambda_vector dist_v;
3068 int v1, v2, cd;
3069
3070 /* Polynomials with more than 2 variables are not handled yet. When
3071 the evolution steps are parameters, it is not possible to
3072 represent the dependence using classical distance vectors. */
3073 if (TREE_CODE (c_0) != INTEGER_CST
3074 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3075 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3076 {
3077 DDR_AFFINE_P (ddr) = false;
3078 return;
3079 }
3080
3081 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3082 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3083
3084 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3085 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3086 v1 = int_cst_value (CHREC_RIGHT (c_1));
3087 v2 = int_cst_value (CHREC_RIGHT (c_2));
3088 cd = gcd (v1, v2);
3089 v1 /= cd;
3090 v2 /= cd;
3091
3092 if (v2 < 0)
3093 {
3094 v2 = -v2;
3095 v1 = -v1;
3096 }
3097
3098 dist_v[x_1] = v2;
3099 dist_v[x_2] = -v1;
3100 save_dist_v (ddr, dist_v);
3101
3102 add_outer_distances (ddr, dist_v, x_1);
3103 }
3104
3105 /* Helper function for the case where DDR_A and DDR_B are the same
3106 access functions. */
3107
3108 static void
3109 add_other_self_distances (struct data_dependence_relation *ddr)
3110 {
3111 lambda_vector dist_v;
3112 unsigned i;
3113 int index_carry = DDR_NB_LOOPS (ddr);
3114
3115 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3116 {
3117 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3118
3119 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3120 {
3121 if (!evolution_function_is_univariate_p (access_fun))
3122 {
3123 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3124 {
3125 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3126 return;
3127 }
3128
3129 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3130
3131 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3132 add_multivariate_self_dist (ddr, access_fun);
3133 else
3134 /* The evolution step is not constant: it varies in
3135 the outer loop, so this cannot be represented by a
3136 distance vector. For example in pr34635.c the
3137 evolution is {0, +, {0, +, 4}_1}_2. */
3138 DDR_AFFINE_P (ddr) = false;
3139
3140 return;
3141 }
3142
3143 index_carry = MIN (index_carry,
3144 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3145 DDR_LOOP_NEST (ddr)));
3146 }
3147 }
3148
3149 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3150 add_outer_distances (ddr, dist_v, index_carry);
3151 }
3152
3153 static void
3154 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3155 {
3156 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3157
3158 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3159 save_dist_v (ddr, dist_v);
3160 }
3161
3162 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3163 is the case for example when access functions are the same and
3164 equal to a constant, as in:
3165
3166 | loop_1
3167 | A[3] = ...
3168 | ... = A[3]
3169 | endloop_1
3170
3171 in which case the distance vectors are (0) and (1). */
3172
3173 static void
3174 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3175 {
3176 unsigned i, j;
3177
3178 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3179 {
3180 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3181 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3182 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3183
3184 for (j = 0; j < ca->n; j++)
3185 if (affine_function_zero_p (ca->fns[j]))
3186 {
3187 insert_innermost_unit_dist_vector (ddr);
3188 return;
3189 }
3190
3191 for (j = 0; j < cb->n; j++)
3192 if (affine_function_zero_p (cb->fns[j]))
3193 {
3194 insert_innermost_unit_dist_vector (ddr);
3195 return;
3196 }
3197 }
3198 }
3199
3200 /* Compute the classic per loop distance vector. DDR is the data
3201 dependence relation to build a vector from. Return false when fail
3202 to represent the data dependence as a distance vector. */
3203
3204 static bool
3205 build_classic_dist_vector (struct data_dependence_relation *ddr,
3206 struct loop *loop_nest)
3207 {
3208 bool init_b = false;
3209 int index_carry = DDR_NB_LOOPS (ddr);
3210 lambda_vector dist_v;
3211
3212 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3213 return false;
3214
3215 if (same_access_functions (ddr))
3216 {
3217 /* Save the 0 vector. */
3218 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3219 save_dist_v (ddr, dist_v);
3220
3221 if (constant_access_functions (ddr))
3222 add_distance_for_zero_overlaps (ddr);
3223
3224 if (DDR_NB_LOOPS (ddr) > 1)
3225 add_other_self_distances (ddr);
3226
3227 return true;
3228 }
3229
3230 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3231 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3232 dist_v, &init_b, &index_carry))
3233 return false;
3234
3235 /* Save the distance vector if we initialized one. */
3236 if (init_b)
3237 {
3238 /* Verify a basic constraint: classic distance vectors should
3239 always be lexicographically positive.
3240
3241 Data references are collected in the order of execution of
3242 the program, thus for the following loop
3243
3244 | for (i = 1; i < 100; i++)
3245 | for (j = 1; j < 100; j++)
3246 | {
3247 | t = T[j+1][i-1]; // A
3248 | T[j][i] = t + 2; // B
3249 | }
3250
3251 references are collected following the direction of the wind:
3252 A then B. The data dependence tests are performed also
3253 following this order, such that we're looking at the distance
3254 separating the elements accessed by A from the elements later
3255 accessed by B. But in this example, the distance returned by
3256 test_dep (A, B) is lexicographically negative (-1, 1), that
3257 means that the access A occurs later than B with respect to
3258 the outer loop, ie. we're actually looking upwind. In this
3259 case we solve test_dep (B, A) looking downwind to the
3260 lexicographically positive solution, that returns the
3261 distance vector (1, -1). */
3262 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3263 {
3264 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3265 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3266 loop_nest))
3267 return false;
3268 compute_subscript_distance (ddr);
3269 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3270 save_v, &init_b, &index_carry))
3271 return false;
3272 save_dist_v (ddr, save_v);
3273 DDR_REVERSED_P (ddr) = true;
3274
3275 /* In this case there is a dependence forward for all the
3276 outer loops:
3277
3278 | for (k = 1; k < 100; k++)
3279 | for (i = 1; i < 100; i++)
3280 | for (j = 1; j < 100; j++)
3281 | {
3282 | t = T[j+1][i-1]; // A
3283 | T[j][i] = t + 2; // B
3284 | }
3285
3286 the vectors are:
3287 (0, 1, -1)
3288 (1, 1, -1)
3289 (1, -1, 1)
3290 */
3291 if (DDR_NB_LOOPS (ddr) > 1)
3292 {
3293 add_outer_distances (ddr, save_v, index_carry);
3294 add_outer_distances (ddr, dist_v, index_carry);
3295 }
3296 }
3297 else
3298 {
3299 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3300 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3301
3302 if (DDR_NB_LOOPS (ddr) > 1)
3303 {
3304 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3305
3306 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3307 DDR_A (ddr), loop_nest))
3308 return false;
3309 compute_subscript_distance (ddr);
3310 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3311 opposite_v, &init_b,
3312 &index_carry))
3313 return false;
3314
3315 save_dist_v (ddr, save_v);
3316 add_outer_distances (ddr, dist_v, index_carry);
3317 add_outer_distances (ddr, opposite_v, index_carry);
3318 }
3319 else
3320 save_dist_v (ddr, save_v);
3321 }
3322 }
3323 else
3324 {
3325 /* There is a distance of 1 on all the outer loops: Example:
3326 there is a dependence of distance 1 on loop_1 for the array A.
3327
3328 | loop_1
3329 | A[5] = ...
3330 | endloop
3331 */
3332 add_outer_distances (ddr, dist_v,
3333 lambda_vector_first_nz (dist_v,
3334 DDR_NB_LOOPS (ddr), 0));
3335 }
3336
3337 if (dump_file && (dump_flags & TDF_DETAILS))
3338 {
3339 unsigned i;
3340
3341 fprintf (dump_file, "(build_classic_dist_vector\n");
3342 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3343 {
3344 fprintf (dump_file, " dist_vector = (");
3345 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3346 DDR_NB_LOOPS (ddr));
3347 fprintf (dump_file, " )\n");
3348 }
3349 fprintf (dump_file, ")\n");
3350 }
3351
3352 return true;
3353 }
3354
3355 /* Return the direction for a given distance.
3356 FIXME: Computing dir this way is suboptimal, since dir can catch
3357 cases that dist is unable to represent. */
3358
3359 static inline enum data_dependence_direction
3360 dir_from_dist (int dist)
3361 {
3362 if (dist > 0)
3363 return dir_positive;
3364 else if (dist < 0)
3365 return dir_negative;
3366 else
3367 return dir_equal;
3368 }
3369
3370 /* Compute the classic per loop direction vector. DDR is the data
3371 dependence relation to build a vector from. */
3372
3373 static void
3374 build_classic_dir_vector (struct data_dependence_relation *ddr)
3375 {
3376 unsigned i, j;
3377 lambda_vector dist_v;
3378
3379 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3380 {
3381 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3382
3383 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3384 dir_v[j] = dir_from_dist (dist_v[j]);
3385
3386 save_dir_v (ddr, dir_v);
3387 }
3388 }
3389
3390 /* Helper function. Returns true when there is a dependence between
3391 data references DRA and DRB. */
3392
3393 static bool
3394 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3395 struct data_reference *dra,
3396 struct data_reference *drb,
3397 struct loop *loop_nest)
3398 {
3399 unsigned int i;
3400 tree last_conflicts;
3401 struct subscript *subscript;
3402
3403 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3404 i++)
3405 {
3406 conflict_function *overlaps_a, *overlaps_b;
3407
3408 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3409 DR_ACCESS_FN (drb, i),
3410 &overlaps_a, &overlaps_b,
3411 &last_conflicts, loop_nest);
3412
3413 if (CF_NOT_KNOWN_P (overlaps_a)
3414 || CF_NOT_KNOWN_P (overlaps_b))
3415 {
3416 finalize_ddr_dependent (ddr, chrec_dont_know);
3417 dependence_stats.num_dependence_undetermined++;
3418 free_conflict_function (overlaps_a);
3419 free_conflict_function (overlaps_b);
3420 return false;
3421 }
3422
3423 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3424 || CF_NO_DEPENDENCE_P (overlaps_b))
3425 {
3426 finalize_ddr_dependent (ddr, chrec_known);
3427 dependence_stats.num_dependence_independent++;
3428 free_conflict_function (overlaps_a);
3429 free_conflict_function (overlaps_b);
3430 return false;
3431 }
3432
3433 else
3434 {
3435 if (SUB_CONFLICTS_IN_A (subscript))
3436 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3437 if (SUB_CONFLICTS_IN_B (subscript))
3438 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3439
3440 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3441 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3442 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3443 }
3444 }
3445
3446 return true;
3447 }
3448
3449 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3450
3451 static void
3452 subscript_dependence_tester (struct data_dependence_relation *ddr,
3453 struct loop *loop_nest)
3454 {
3455
3456 if (dump_file && (dump_flags & TDF_DETAILS))
3457 fprintf (dump_file, "(subscript_dependence_tester \n");
3458
3459 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3460 dependence_stats.num_dependence_dependent++;
3461
3462 compute_subscript_distance (ddr);
3463 if (build_classic_dist_vector (ddr, loop_nest))
3464 build_classic_dir_vector (ddr);
3465
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3467 fprintf (dump_file, ")\n");
3468 }
3469
3470 /* Returns true when all the access functions of A are affine or
3471 constant with respect to LOOP_NEST. */
3472
3473 static bool
3474 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3475 const struct loop *loop_nest)
3476 {
3477 unsigned int i;
3478 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3479 tree t;
3480
3481 FOR_EACH_VEC_ELT (tree, fns, i, t)
3482 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3483 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3484 return false;
3485
3486 return true;
3487 }
3488
3489 /* Initializes an equation for an OMEGA problem using the information
3490 contained in the ACCESS_FUN. Returns true when the operation
3491 succeeded.
3492
3493 PB is the omega constraint system.
3494 EQ is the number of the equation to be initialized.
3495 OFFSET is used for shifting the variables names in the constraints:
3496 a constrain is composed of 2 * the number of variables surrounding
3497 dependence accesses. OFFSET is set either to 0 for the first n variables,
3498 then it is set to n.
3499 ACCESS_FUN is expected to be an affine chrec. */
3500
3501 static bool
3502 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3503 unsigned int offset, tree access_fun,
3504 struct data_dependence_relation *ddr)
3505 {
3506 switch (TREE_CODE (access_fun))
3507 {
3508 case POLYNOMIAL_CHREC:
3509 {
3510 tree left = CHREC_LEFT (access_fun);
3511 tree right = CHREC_RIGHT (access_fun);
3512 int var = CHREC_VARIABLE (access_fun);
3513 unsigned var_idx;
3514
3515 if (TREE_CODE (right) != INTEGER_CST)
3516 return false;
3517
3518 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3519 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3520
3521 /* Compute the innermost loop index. */
3522 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3523
3524 if (offset == 0)
3525 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3526 += int_cst_value (right);
3527
3528 switch (TREE_CODE (left))
3529 {
3530 case POLYNOMIAL_CHREC:
3531 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3532
3533 case INTEGER_CST:
3534 pb->eqs[eq].coef[0] += int_cst_value (left);
3535 return true;
3536
3537 default:
3538 return false;
3539 }
3540 }
3541
3542 case INTEGER_CST:
3543 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3544 return true;
3545
3546 default:
3547 return false;
3548 }
3549 }
3550
3551 /* As explained in the comments preceding init_omega_for_ddr, we have
3552 to set up a system for each loop level, setting outer loops
3553 variation to zero, and current loop variation to positive or zero.
3554 Save each lexico positive distance vector. */
3555
3556 static void
3557 omega_extract_distance_vectors (omega_pb pb,
3558 struct data_dependence_relation *ddr)
3559 {
3560 int eq, geq;
3561 unsigned i, j;
3562 struct loop *loopi, *loopj;
3563 enum omega_result res;
3564
3565 /* Set a new problem for each loop in the nest. The basis is the
3566 problem that we have initialized until now. On top of this we
3567 add new constraints. */
3568 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3569 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3570 {
3571 int dist = 0;
3572 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3573 DDR_NB_LOOPS (ddr));
3574
3575 omega_copy_problem (copy, pb);
3576
3577 /* For all the outer loops "loop_j", add "dj = 0". */
3578 for (j = 0;
3579 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3580 {
3581 eq = omega_add_zero_eq (copy, omega_black);
3582 copy->eqs[eq].coef[j + 1] = 1;
3583 }
3584
3585 /* For "loop_i", add "0 <= di". */
3586 geq = omega_add_zero_geq (copy, omega_black);
3587 copy->geqs[geq].coef[i + 1] = 1;
3588
3589 /* Reduce the constraint system, and test that the current
3590 problem is feasible. */
3591 res = omega_simplify_problem (copy);
3592 if (res == omega_false
3593 || res == omega_unknown
3594 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3595 goto next_problem;
3596
3597 for (eq = 0; eq < copy->num_subs; eq++)
3598 if (copy->subs[eq].key == (int) i + 1)
3599 {
3600 dist = copy->subs[eq].coef[0];
3601 goto found_dist;
3602 }
3603
3604 if (dist == 0)
3605 {
3606 /* Reinitialize problem... */
3607 omega_copy_problem (copy, pb);
3608 for (j = 0;
3609 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3610 {
3611 eq = omega_add_zero_eq (copy, omega_black);
3612 copy->eqs[eq].coef[j + 1] = 1;
3613 }
3614
3615 /* ..., but this time "di = 1". */
3616 eq = omega_add_zero_eq (copy, omega_black);
3617 copy->eqs[eq].coef[i + 1] = 1;
3618 copy->eqs[eq].coef[0] = -1;
3619
3620 res = omega_simplify_problem (copy);
3621 if (res == omega_false
3622 || res == omega_unknown
3623 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3624 goto next_problem;
3625
3626 for (eq = 0; eq < copy->num_subs; eq++)
3627 if (copy->subs[eq].key == (int) i + 1)
3628 {
3629 dist = copy->subs[eq].coef[0];
3630 goto found_dist;
3631 }
3632 }
3633
3634 found_dist:;
3635 /* Save the lexicographically positive distance vector. */
3636 if (dist >= 0)
3637 {
3638 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3639 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3640
3641 dist_v[i] = dist;
3642
3643 for (eq = 0; eq < copy->num_subs; eq++)
3644 if (copy->subs[eq].key > 0)
3645 {
3646 dist = copy->subs[eq].coef[0];
3647 dist_v[copy->subs[eq].key - 1] = dist;
3648 }
3649
3650 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3651 dir_v[j] = dir_from_dist (dist_v[j]);
3652
3653 save_dist_v (ddr, dist_v);
3654 save_dir_v (ddr, dir_v);
3655 }
3656
3657 next_problem:;
3658 omega_free_problem (copy);
3659 }
3660 }
3661
3662 /* This is called for each subscript of a tuple of data references:
3663 insert an equality for representing the conflicts. */
3664
3665 static bool
3666 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3667 struct data_dependence_relation *ddr,
3668 omega_pb pb, bool *maybe_dependent)
3669 {
3670 int eq;
3671 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3672 TREE_TYPE (access_fun_b));
3673 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3674 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3675 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3676 tree minus_one;
3677
3678 /* When the fun_a - fun_b is not constant, the dependence is not
3679 captured by the classic distance vector representation. */
3680 if (TREE_CODE (difference) != INTEGER_CST)
3681 return false;
3682
3683 /* ZIV test. */
3684 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3685 {
3686 /* There is no dependence. */
3687 *maybe_dependent = false;
3688 return true;
3689 }
3690
3691 minus_one = build_int_cst (type, -1);
3692 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3693
3694 eq = omega_add_zero_eq (pb, omega_black);
3695 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3696 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3697 /* There is probably a dependence, but the system of
3698 constraints cannot be built: answer "don't know". */
3699 return false;
3700
3701 /* GCD test. */
3702 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3703 && !int_divides_p (lambda_vector_gcd
3704 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3705 2 * DDR_NB_LOOPS (ddr)),
3706 pb->eqs[eq].coef[0]))
3707 {
3708 /* There is no dependence. */
3709 *maybe_dependent = false;
3710 return true;
3711 }
3712
3713 return true;
3714 }
3715
3716 /* Helper function, same as init_omega_for_ddr but specialized for
3717 data references A and B. */
3718
3719 static bool
3720 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3721 struct data_dependence_relation *ddr,
3722 omega_pb pb, bool *maybe_dependent)
3723 {
3724 unsigned i;
3725 int ineq;
3726 struct loop *loopi;
3727 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3728
3729 /* Insert an equality per subscript. */
3730 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3731 {
3732 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3733 ddr, pb, maybe_dependent))
3734 return false;
3735 else if (*maybe_dependent == false)
3736 {
3737 /* There is no dependence. */
3738 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3739 return true;
3740 }
3741 }
3742
3743 /* Insert inequalities: constraints corresponding to the iteration
3744 domain, i.e. the loops surrounding the references "loop_x" and
3745 the distance variables "dx". The layout of the OMEGA
3746 representation is as follows:
3747 - coef[0] is the constant
3748 - coef[1..nb_loops] are the protected variables that will not be
3749 removed by the solver: the "dx"
3750 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3751 */
3752 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3753 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3754 {
3755 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3756
3757 /* 0 <= loop_x */
3758 ineq = omega_add_zero_geq (pb, omega_black);
3759 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3760
3761 /* 0 <= loop_x + dx */
3762 ineq = omega_add_zero_geq (pb, omega_black);
3763 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3764 pb->geqs[ineq].coef[i + 1] = 1;
3765
3766 if (nbi != -1)
3767 {
3768 /* loop_x <= nb_iters */
3769 ineq = omega_add_zero_geq (pb, omega_black);
3770 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3771 pb->geqs[ineq].coef[0] = nbi;
3772
3773 /* loop_x + dx <= nb_iters */
3774 ineq = omega_add_zero_geq (pb, omega_black);
3775 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3776 pb->geqs[ineq].coef[i + 1] = -1;
3777 pb->geqs[ineq].coef[0] = nbi;
3778
3779 /* A step "dx" bigger than nb_iters is not feasible, so
3780 add "0 <= nb_iters + dx", */
3781 ineq = omega_add_zero_geq (pb, omega_black);
3782 pb->geqs[ineq].coef[i + 1] = 1;
3783 pb->geqs[ineq].coef[0] = nbi;
3784 /* and "dx <= nb_iters". */
3785 ineq = omega_add_zero_geq (pb, omega_black);
3786 pb->geqs[ineq].coef[i + 1] = -1;
3787 pb->geqs[ineq].coef[0] = nbi;
3788 }
3789 }
3790
3791 omega_extract_distance_vectors (pb, ddr);
3792
3793 return true;
3794 }
3795
3796 /* Sets up the Omega dependence problem for the data dependence
3797 relation DDR. Returns false when the constraint system cannot be
3798 built, ie. when the test answers "don't know". Returns true
3799 otherwise, and when independence has been proved (using one of the
3800 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3801 set MAYBE_DEPENDENT to true.
3802
3803 Example: for setting up the dependence system corresponding to the
3804 conflicting accesses
3805
3806 | loop_i
3807 | loop_j
3808 | A[i, i+1] = ...
3809 | ... A[2*j, 2*(i + j)]
3810 | endloop_j
3811 | endloop_i
3812
3813 the following constraints come from the iteration domain:
3814
3815 0 <= i <= Ni
3816 0 <= i + di <= Ni
3817 0 <= j <= Nj
3818 0 <= j + dj <= Nj
3819
3820 where di, dj are the distance variables. The constraints
3821 representing the conflicting elements are:
3822
3823 i = 2 * (j + dj)
3824 i + 1 = 2 * (i + di + j + dj)
3825
3826 For asking that the resulting distance vector (di, dj) be
3827 lexicographically positive, we insert the constraint "di >= 0". If
3828 "di = 0" in the solution, we fix that component to zero, and we
3829 look at the inner loops: we set a new problem where all the outer
3830 loop distances are zero, and fix this inner component to be
3831 positive. When one of the components is positive, we save that
3832 distance, and set a new problem where the distance on this loop is
3833 zero, searching for other distances in the inner loops. Here is
3834 the classic example that illustrates that we have to set for each
3835 inner loop a new problem:
3836
3837 | loop_1
3838 | loop_2
3839 | A[10]
3840 | endloop_2
3841 | endloop_1
3842
3843 we have to save two distances (1, 0) and (0, 1).
3844
3845 Given two array references, refA and refB, we have to set the
3846 dependence problem twice, refA vs. refB and refB vs. refA, and we
3847 cannot do a single test, as refB might occur before refA in the
3848 inner loops, and the contrary when considering outer loops: ex.
3849
3850 | loop_0
3851 | loop_1
3852 | loop_2
3853 | T[{1,+,1}_2][{1,+,1}_1] // refA
3854 | T[{2,+,1}_2][{0,+,1}_1] // refB
3855 | endloop_2
3856 | endloop_1
3857 | endloop_0
3858
3859 refB touches the elements in T before refA, and thus for the same
3860 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3861 but for successive loop_0 iterations, we have (1, -1, 1)
3862
3863 The Omega solver expects the distance variables ("di" in the
3864 previous example) to come first in the constraint system (as
3865 variables to be protected, or "safe" variables), the constraint
3866 system is built using the following layout:
3867
3868 "cst | distance vars | index vars".
3869 */
3870
3871 static bool
3872 init_omega_for_ddr (struct data_dependence_relation *ddr,
3873 bool *maybe_dependent)
3874 {
3875 omega_pb pb;
3876 bool res = false;
3877
3878 *maybe_dependent = true;
3879
3880 if (same_access_functions (ddr))
3881 {
3882 unsigned j;
3883 lambda_vector dir_v;
3884
3885 /* Save the 0 vector. */
3886 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3887 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3888 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3889 dir_v[j] = dir_equal;
3890 save_dir_v (ddr, dir_v);
3891
3892 /* Save the dependences carried by outer loops. */
3893 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3894 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3895 maybe_dependent);
3896 omega_free_problem (pb);
3897 return res;
3898 }
3899
3900 /* Omega expects the protected variables (those that have to be kept
3901 after elimination) to appear first in the constraint system.
3902 These variables are the distance variables. In the following
3903 initialization we declare NB_LOOPS safe variables, and the total
3904 number of variables for the constraint system is 2*NB_LOOPS. */
3905 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3906 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3907 maybe_dependent);
3908 omega_free_problem (pb);
3909
3910 /* Stop computation if not decidable, or no dependence. */
3911 if (res == false || *maybe_dependent == false)
3912 return res;
3913
3914 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3915 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3916 maybe_dependent);
3917 omega_free_problem (pb);
3918
3919 return res;
3920 }
3921
3922 /* Return true when DDR contains the same information as that stored
3923 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3924
3925 static bool
3926 ddr_consistent_p (FILE *file,
3927 struct data_dependence_relation *ddr,
3928 VEC (lambda_vector, heap) *dist_vects,
3929 VEC (lambda_vector, heap) *dir_vects)
3930 {
3931 unsigned int i, j;
3932
3933 /* If dump_file is set, output there. */
3934 if (dump_file && (dump_flags & TDF_DETAILS))
3935 file = dump_file;
3936
3937 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3938 {
3939 lambda_vector b_dist_v;
3940 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3941 VEC_length (lambda_vector, dist_vects),
3942 DDR_NUM_DIST_VECTS (ddr));
3943
3944 fprintf (file, "Banerjee dist vectors:\n");
3945 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3946 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3947
3948 fprintf (file, "Omega dist vectors:\n");
3949 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3950 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3951
3952 fprintf (file, "data dependence relation:\n");
3953 dump_data_dependence_relation (file, ddr);
3954
3955 fprintf (file, ")\n");
3956 return false;
3957 }
3958
3959 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3960 {
3961 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3962 VEC_length (lambda_vector, dir_vects),
3963 DDR_NUM_DIR_VECTS (ddr));
3964 return false;
3965 }
3966
3967 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3968 {
3969 lambda_vector a_dist_v;
3970 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3971
3972 /* Distance vectors are not ordered in the same way in the DDR
3973 and in the DIST_VECTS: search for a matching vector. */
3974 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3975 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3976 break;
3977
3978 if (j == VEC_length (lambda_vector, dist_vects))
3979 {
3980 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3981 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3982 fprintf (file, "not found in Omega dist vectors:\n");
3983 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3984 fprintf (file, "data dependence relation:\n");
3985 dump_data_dependence_relation (file, ddr);
3986 fprintf (file, ")\n");
3987 }
3988 }
3989
3990 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3991 {
3992 lambda_vector a_dir_v;
3993 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3994
3995 /* Direction vectors are not ordered in the same way in the DDR
3996 and in the DIR_VECTS: search for a matching vector. */
3997 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3998 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3999 break;
4000
4001 if (j == VEC_length (lambda_vector, dist_vects))
4002 {
4003 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4004 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4005 fprintf (file, "not found in Omega dir vectors:\n");
4006 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4007 fprintf (file, "data dependence relation:\n");
4008 dump_data_dependence_relation (file, ddr);
4009 fprintf (file, ")\n");
4010 }
4011 }
4012
4013 return true;
4014 }
4015
4016 /* This computes the affine dependence relation between A and B with
4017 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4018 independence between two accesses, while CHREC_DONT_KNOW is used
4019 for representing the unknown relation.
4020
4021 Note that it is possible to stop the computation of the dependence
4022 relation the first time we detect a CHREC_KNOWN element for a given
4023 subscript. */
4024
4025 static void
4026 compute_affine_dependence (struct data_dependence_relation *ddr,
4027 struct loop *loop_nest)
4028 {
4029 struct data_reference *dra = DDR_A (ddr);
4030 struct data_reference *drb = DDR_B (ddr);
4031
4032 if (dump_file && (dump_flags & TDF_DETAILS))
4033 {
4034 fprintf (dump_file, "(compute_affine_dependence\n");
4035 fprintf (dump_file, " (stmt_a = \n");
4036 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4037 fprintf (dump_file, ")\n (stmt_b = \n");
4038 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4039 fprintf (dump_file, ")\n");
4040 }
4041
4042 /* Analyze only when the dependence relation is not yet known. */
4043 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4044 && !DDR_SELF_REFERENCE (ddr))
4045 {
4046 dependence_stats.num_dependence_tests++;
4047
4048 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4049 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4050 {
4051 if (flag_check_data_deps)
4052 {
4053 /* Compute the dependences using the first algorithm. */
4054 subscript_dependence_tester (ddr, loop_nest);
4055
4056 if (dump_file && (dump_flags & TDF_DETAILS))
4057 {
4058 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4059 dump_data_dependence_relation (dump_file, ddr);
4060 }
4061
4062 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4063 {
4064 bool maybe_dependent;
4065 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4066
4067 /* Save the result of the first DD analyzer. */
4068 dist_vects = DDR_DIST_VECTS (ddr);
4069 dir_vects = DDR_DIR_VECTS (ddr);
4070
4071 /* Reset the information. */
4072 DDR_DIST_VECTS (ddr) = NULL;
4073 DDR_DIR_VECTS (ddr) = NULL;
4074
4075 /* Compute the same information using Omega. */
4076 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4077 goto csys_dont_know;
4078
4079 if (dump_file && (dump_flags & TDF_DETAILS))
4080 {
4081 fprintf (dump_file, "Omega Analyzer\n");
4082 dump_data_dependence_relation (dump_file, ddr);
4083 }
4084
4085 /* Check that we get the same information. */
4086 if (maybe_dependent)
4087 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4088 dir_vects));
4089 }
4090 }
4091 else
4092 subscript_dependence_tester (ddr, loop_nest);
4093 }
4094
4095 /* As a last case, if the dependence cannot be determined, or if
4096 the dependence is considered too difficult to determine, answer
4097 "don't know". */
4098 else
4099 {
4100 csys_dont_know:;
4101 dependence_stats.num_dependence_undetermined++;
4102
4103 if (dump_file && (dump_flags & TDF_DETAILS))
4104 {
4105 fprintf (dump_file, "Data ref a:\n");
4106 dump_data_reference (dump_file, dra);
4107 fprintf (dump_file, "Data ref b:\n");
4108 dump_data_reference (dump_file, drb);
4109 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4110 }
4111 finalize_ddr_dependent (ddr, chrec_dont_know);
4112 }
4113 }
4114
4115 if (dump_file && (dump_flags & TDF_DETAILS))
4116 fprintf (dump_file, ")\n");
4117 }
4118
4119 /* This computes the dependence relation for the same data
4120 reference into DDR. */
4121
4122 void
4123 compute_self_dependence (struct data_dependence_relation *ddr)
4124 {
4125 unsigned int i;
4126 struct subscript *subscript;
4127
4128 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4129 return;
4130
4131 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4132 i++)
4133 {
4134 if (SUB_CONFLICTS_IN_A (subscript))
4135 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4136 if (SUB_CONFLICTS_IN_B (subscript))
4137 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4138
4139 /* The accessed index overlaps for each iteration. */
4140 SUB_CONFLICTS_IN_A (subscript)
4141 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4142 SUB_CONFLICTS_IN_B (subscript)
4143 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4144 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4145 }
4146
4147 /* The distance vector is the zero vector. */
4148 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4149 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4150 }
4151
4152 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4153 the data references in DATAREFS, in the LOOP_NEST. When
4154 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4155 relations. */
4156
4157 void
4158 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4159 VEC (ddr_p, heap) **dependence_relations,
4160 VEC (loop_p, heap) *loop_nest,
4161 bool compute_self_and_rr)
4162 {
4163 struct data_dependence_relation *ddr;
4164 struct data_reference *a, *b;
4165 unsigned int i, j;
4166
4167 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4168 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4169 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4170 {
4171 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4172 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4173 if (loop_nest)
4174 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4175 }
4176
4177 if (compute_self_and_rr)
4178 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4179 {
4180 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4181 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4182 compute_self_dependence (ddr);
4183 }
4184 }
4185
4186 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4187 true if STMT clobbers memory, false otherwise. */
4188
4189 bool
4190 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4191 {
4192 bool clobbers_memory = false;
4193 data_ref_loc *ref;
4194 tree *op0, *op1;
4195 enum gimple_code stmt_code = gimple_code (stmt);
4196
4197 *references = NULL;
4198
4199 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4200 Calls have side-effects, except those to const or pure
4201 functions. */
4202 if ((stmt_code == GIMPLE_CALL
4203 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4204 || (stmt_code == GIMPLE_ASM
4205 && gimple_asm_volatile_p (stmt)))
4206 clobbers_memory = true;
4207
4208 if (!gimple_vuse (stmt))
4209 return clobbers_memory;
4210
4211 if (stmt_code == GIMPLE_ASSIGN)
4212 {
4213 tree base;
4214 op0 = gimple_assign_lhs_ptr (stmt);
4215 op1 = gimple_assign_rhs1_ptr (stmt);
4216
4217 if (DECL_P (*op1)
4218 || (REFERENCE_CLASS_P (*op1)
4219 && (base = get_base_address (*op1))
4220 && TREE_CODE (base) != SSA_NAME))
4221 {
4222 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4223 ref->pos = op1;
4224 ref->is_read = true;
4225 }
4226 }
4227 else if (stmt_code == GIMPLE_CALL)
4228 {
4229 unsigned i, n;
4230
4231 op0 = gimple_call_lhs_ptr (stmt);
4232 n = gimple_call_num_args (stmt);
4233 for (i = 0; i < n; i++)
4234 {
4235 op1 = gimple_call_arg_ptr (stmt, i);
4236
4237 if (DECL_P (*op1)
4238 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4239 {
4240 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4241 ref->pos = op1;
4242 ref->is_read = true;
4243 }
4244 }
4245 }
4246 else
4247 return clobbers_memory;
4248
4249 if (*op0
4250 && (DECL_P (*op0)
4251 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4252 {
4253 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4254 ref->pos = op0;
4255 ref->is_read = false;
4256 }
4257 return clobbers_memory;
4258 }
4259
4260 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4261 reference, returns false, otherwise returns true. NEST is the outermost
4262 loop of the loop nest in which the references should be analyzed. */
4263
4264 bool
4265 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4266 VEC (data_reference_p, heap) **datarefs)
4267 {
4268 unsigned i;
4269 VEC (data_ref_loc, heap) *references;
4270 data_ref_loc *ref;
4271 bool ret = true;
4272 data_reference_p dr;
4273
4274 if (get_references_in_stmt (stmt, &references))
4275 {
4276 VEC_free (data_ref_loc, heap, references);
4277 return false;
4278 }
4279
4280 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4281 {
4282 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4283 *ref->pos, stmt, ref->is_read);
4284 gcc_assert (dr != NULL);
4285 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4286 }
4287 VEC_free (data_ref_loc, heap, references);
4288 return ret;
4289 }
4290
4291 /* Stores the data references in STMT to DATAREFS. If there is an
4292 unanalyzable reference, returns false, otherwise returns true.
4293 NEST is the outermost loop of the loop nest in which the references
4294 should be instantiated, LOOP is the loop in which the references
4295 should be analyzed. */
4296
4297 bool
4298 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4299 VEC (data_reference_p, heap) **datarefs)
4300 {
4301 unsigned i;
4302 VEC (data_ref_loc, heap) *references;
4303 data_ref_loc *ref;
4304 bool ret = true;
4305 data_reference_p dr;
4306
4307 if (get_references_in_stmt (stmt, &references))
4308 {
4309 VEC_free (data_ref_loc, heap, references);
4310 return false;
4311 }
4312
4313 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4314 {
4315 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4316 gcc_assert (dr != NULL);
4317 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4318 }
4319
4320 VEC_free (data_ref_loc, heap, references);
4321 return ret;
4322 }
4323
4324 /* Search the data references in LOOP, and record the information into
4325 DATAREFS. Returns chrec_dont_know when failing to analyze a
4326 difficult case, returns NULL_TREE otherwise. */
4327
4328 tree
4329 find_data_references_in_bb (struct loop *loop, basic_block bb,
4330 VEC (data_reference_p, heap) **datarefs)
4331 {
4332 gimple_stmt_iterator bsi;
4333
4334 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4335 {
4336 gimple stmt = gsi_stmt (bsi);
4337
4338 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4339 {
4340 struct data_reference *res;
4341 res = XCNEW (struct data_reference);
4342 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4343
4344 return chrec_dont_know;
4345 }
4346 }
4347
4348 return NULL_TREE;
4349 }
4350
4351 /* Search the data references in LOOP, and record the information into
4352 DATAREFS. Returns chrec_dont_know when failing to analyze a
4353 difficult case, returns NULL_TREE otherwise.
4354
4355 TODO: This function should be made smarter so that it can handle address
4356 arithmetic as if they were array accesses, etc. */
4357
4358 tree
4359 find_data_references_in_loop (struct loop *loop,
4360 VEC (data_reference_p, heap) **datarefs)
4361 {
4362 basic_block bb, *bbs;
4363 unsigned int i;
4364
4365 bbs = get_loop_body_in_dom_order (loop);
4366
4367 for (i = 0; i < loop->num_nodes; i++)
4368 {
4369 bb = bbs[i];
4370
4371 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4372 {
4373 free (bbs);
4374 return chrec_dont_know;
4375 }
4376 }
4377 free (bbs);
4378
4379 return NULL_TREE;
4380 }
4381
4382 /* Recursive helper function. */
4383
4384 static bool
4385 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4386 {
4387 /* Inner loops of the nest should not contain siblings. Example:
4388 when there are two consecutive loops,
4389
4390 | loop_0
4391 | loop_1
4392 | A[{0, +, 1}_1]
4393 | endloop_1
4394 | loop_2
4395 | A[{0, +, 1}_2]
4396 | endloop_2
4397 | endloop_0
4398
4399 the dependence relation cannot be captured by the distance
4400 abstraction. */
4401 if (loop->next)
4402 return false;
4403
4404 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4405 if (loop->inner)
4406 return find_loop_nest_1 (loop->inner, loop_nest);
4407 return true;
4408 }
4409
4410 /* Return false when the LOOP is not well nested. Otherwise return
4411 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4412 contain the loops from the outermost to the innermost, as they will
4413 appear in the classic distance vector. */
4414
4415 bool
4416 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4417 {
4418 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4419 if (loop->inner)
4420 return find_loop_nest_1 (loop->inner, loop_nest);
4421 return true;
4422 }
4423
4424 /* Returns true when the data dependences have been computed, false otherwise.
4425 Given a loop nest LOOP, the following vectors are returned:
4426 DATAREFS is initialized to all the array elements contained in this loop,
4427 DEPENDENCE_RELATIONS contains the relations between the data references.
4428 Compute read-read and self relations if
4429 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4430
4431 bool
4432 compute_data_dependences_for_loop (struct loop *loop,
4433 bool compute_self_and_read_read_dependences,
4434 VEC (loop_p, heap) **loop_nest,
4435 VEC (data_reference_p, heap) **datarefs,
4436 VEC (ddr_p, heap) **dependence_relations)
4437 {
4438 bool res = true;
4439
4440 memset (&dependence_stats, 0, sizeof (dependence_stats));
4441
4442 /* If the loop nest is not well formed, or one of the data references
4443 is not computable, give up without spending time to compute other
4444 dependences. */
4445 if (!loop
4446 || !find_loop_nest (loop, loop_nest)
4447 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4448 {
4449 struct data_dependence_relation *ddr;
4450
4451 /* Insert a single relation into dependence_relations:
4452 chrec_dont_know. */
4453 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4454 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4455 res = false;
4456 }
4457 else
4458 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4459 compute_self_and_read_read_dependences);
4460
4461 if (dump_file && (dump_flags & TDF_STATS))
4462 {
4463 fprintf (dump_file, "Dependence tester statistics:\n");
4464
4465 fprintf (dump_file, "Number of dependence tests: %d\n",
4466 dependence_stats.num_dependence_tests);
4467 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4468 dependence_stats.num_dependence_dependent);
4469 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4470 dependence_stats.num_dependence_independent);
4471 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4472 dependence_stats.num_dependence_undetermined);
4473
4474 fprintf (dump_file, "Number of subscript tests: %d\n",
4475 dependence_stats.num_subscript_tests);
4476 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4477 dependence_stats.num_subscript_undetermined);
4478 fprintf (dump_file, "Number of same subscript function: %d\n",
4479 dependence_stats.num_same_subscript_function);
4480
4481 fprintf (dump_file, "Number of ziv tests: %d\n",
4482 dependence_stats.num_ziv);
4483 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4484 dependence_stats.num_ziv_dependent);
4485 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4486 dependence_stats.num_ziv_independent);
4487 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4488 dependence_stats.num_ziv_unimplemented);
4489
4490 fprintf (dump_file, "Number of siv tests: %d\n",
4491 dependence_stats.num_siv);
4492 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4493 dependence_stats.num_siv_dependent);
4494 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4495 dependence_stats.num_siv_independent);
4496 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4497 dependence_stats.num_siv_unimplemented);
4498
4499 fprintf (dump_file, "Number of miv tests: %d\n",
4500 dependence_stats.num_miv);
4501 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4502 dependence_stats.num_miv_dependent);
4503 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4504 dependence_stats.num_miv_independent);
4505 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4506 dependence_stats.num_miv_unimplemented);
4507 }
4508
4509 return res;
4510 }
4511
4512 /* Returns true when the data dependences for the basic block BB have been
4513 computed, false otherwise.
4514 DATAREFS is initialized to all the array elements contained in this basic
4515 block, DEPENDENCE_RELATIONS contains the relations between the data
4516 references. Compute read-read and self relations if
4517 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4518 bool
4519 compute_data_dependences_for_bb (basic_block bb,
4520 bool compute_self_and_read_read_dependences,
4521 VEC (data_reference_p, heap) **datarefs,
4522 VEC (ddr_p, heap) **dependence_relations)
4523 {
4524 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4525 return false;
4526
4527 compute_all_dependences (*datarefs, dependence_relations, NULL,
4528 compute_self_and_read_read_dependences);
4529 return true;
4530 }
4531
4532 /* Entry point (for testing only). Analyze all the data references
4533 and the dependence relations in LOOP.
4534
4535 The data references are computed first.
4536
4537 A relation on these nodes is represented by a complete graph. Some
4538 of the relations could be of no interest, thus the relations can be
4539 computed on demand.
4540
4541 In the following function we compute all the relations. This is
4542 just a first implementation that is here for:
4543 - for showing how to ask for the dependence relations,
4544 - for the debugging the whole dependence graph,
4545 - for the dejagnu testcases and maintenance.
4546
4547 It is possible to ask only for a part of the graph, avoiding to
4548 compute the whole dependence graph. The computed dependences are
4549 stored in a knowledge base (KB) such that later queries don't
4550 recompute the same information. The implementation of this KB is
4551 transparent to the optimizer, and thus the KB can be changed with a
4552 more efficient implementation, or the KB could be disabled. */
4553 static void
4554 analyze_all_data_dependences (struct loop *loop)
4555 {
4556 unsigned int i;
4557 int nb_data_refs = 10;
4558 VEC (data_reference_p, heap) *datarefs =
4559 VEC_alloc (data_reference_p, heap, nb_data_refs);
4560 VEC (ddr_p, heap) *dependence_relations =
4561 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4562 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4563
4564 /* Compute DDs on the whole function. */
4565 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4566 &dependence_relations);
4567
4568 if (dump_file)
4569 {
4570 dump_data_dependence_relations (dump_file, dependence_relations);
4571 fprintf (dump_file, "\n\n");
4572
4573 if (dump_flags & TDF_DETAILS)
4574 dump_dist_dir_vectors (dump_file, dependence_relations);
4575
4576 if (dump_flags & TDF_STATS)
4577 {
4578 unsigned nb_top_relations = 0;
4579 unsigned nb_bot_relations = 0;
4580 unsigned nb_chrec_relations = 0;
4581 struct data_dependence_relation *ddr;
4582
4583 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4584 {
4585 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4586 nb_top_relations++;
4587
4588 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4589 nb_bot_relations++;
4590
4591 else
4592 nb_chrec_relations++;
4593 }
4594
4595 gather_stats_on_scev_database ();
4596 }
4597 }
4598
4599 VEC_free (loop_p, heap, loop_nest);
4600 free_dependence_relations (dependence_relations);
4601 free_data_refs (datarefs);
4602 }
4603
4604 /* Computes all the data dependences and check that the results of
4605 several analyzers are the same. */
4606
4607 void
4608 tree_check_data_deps (void)
4609 {
4610 loop_iterator li;
4611 struct loop *loop_nest;
4612
4613 FOR_EACH_LOOP (li, loop_nest, 0)
4614 analyze_all_data_dependences (loop_nest);
4615 }
4616
4617 /* Free the memory used by a data dependence relation DDR. */
4618
4619 void
4620 free_dependence_relation (struct data_dependence_relation *ddr)
4621 {
4622 if (ddr == NULL)
4623 return;
4624
4625 if (DDR_SUBSCRIPTS (ddr))
4626 free_subscripts (DDR_SUBSCRIPTS (ddr));
4627 if (DDR_DIST_VECTS (ddr))
4628 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4629 if (DDR_DIR_VECTS (ddr))
4630 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4631
4632 free (ddr);
4633 }
4634
4635 /* Free the memory used by the data dependence relations from
4636 DEPENDENCE_RELATIONS. */
4637
4638 void
4639 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4640 {
4641 unsigned int i;
4642 struct data_dependence_relation *ddr;
4643
4644 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4645 if (ddr)
4646 free_dependence_relation (ddr);
4647
4648 VEC_free (ddr_p, heap, dependence_relations);
4649 }
4650
4651 /* Free the memory used by the data references from DATAREFS. */
4652
4653 void
4654 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4655 {
4656 unsigned int i;
4657 struct data_reference *dr;
4658
4659 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4660 free_data_ref (dr);
4661 VEC_free (data_reference_p, heap, datarefs);
4662 }
4663
4664 \f
4665
4666 /* Dump vertex I in RDG to FILE. */
4667
4668 void
4669 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4670 {
4671 struct vertex *v = &(rdg->vertices[i]);
4672 struct graph_edge *e;
4673
4674 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4675 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4676 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4677
4678 if (v->pred)
4679 for (e = v->pred; e; e = e->pred_next)
4680 fprintf (file, " %d", e->src);
4681
4682 fprintf (file, ") (out:");
4683
4684 if (v->succ)
4685 for (e = v->succ; e; e = e->succ_next)
4686 fprintf (file, " %d", e->dest);
4687
4688 fprintf (file, ")\n");
4689 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4690 fprintf (file, ")\n");
4691 }
4692
4693 /* Call dump_rdg_vertex on stderr. */
4694
4695 DEBUG_FUNCTION void
4696 debug_rdg_vertex (struct graph *rdg, int i)
4697 {
4698 dump_rdg_vertex (stderr, rdg, i);
4699 }
4700
4701 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4702 dumped vertices to that bitmap. */
4703
4704 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4705 {
4706 int i;
4707
4708 fprintf (file, "(%d\n", c);
4709
4710 for (i = 0; i < rdg->n_vertices; i++)
4711 if (rdg->vertices[i].component == c)
4712 {
4713 if (dumped)
4714 bitmap_set_bit (dumped, i);
4715
4716 dump_rdg_vertex (file, rdg, i);
4717 }
4718
4719 fprintf (file, ")\n");
4720 }
4721
4722 /* Call dump_rdg_vertex on stderr. */
4723
4724 DEBUG_FUNCTION void
4725 debug_rdg_component (struct graph *rdg, int c)
4726 {
4727 dump_rdg_component (stderr, rdg, c, NULL);
4728 }
4729
4730 /* Dump the reduced dependence graph RDG to FILE. */
4731
4732 void
4733 dump_rdg (FILE *file, struct graph *rdg)
4734 {
4735 int i;
4736 bitmap dumped = BITMAP_ALLOC (NULL);
4737
4738 fprintf (file, "(rdg\n");
4739
4740 for (i = 0; i < rdg->n_vertices; i++)
4741 if (!bitmap_bit_p (dumped, i))
4742 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4743
4744 fprintf (file, ")\n");
4745 BITMAP_FREE (dumped);
4746 }
4747
4748 /* Call dump_rdg on stderr. */
4749
4750 DEBUG_FUNCTION void
4751 debug_rdg (struct graph *rdg)
4752 {
4753 dump_rdg (stderr, rdg);
4754 }
4755
4756 static void
4757 dot_rdg_1 (FILE *file, struct graph *rdg)
4758 {
4759 int i;
4760
4761 fprintf (file, "digraph RDG {\n");
4762
4763 for (i = 0; i < rdg->n_vertices; i++)
4764 {
4765 struct vertex *v = &(rdg->vertices[i]);
4766 struct graph_edge *e;
4767
4768 /* Highlight reads from memory. */
4769 if (RDG_MEM_READS_STMT (rdg, i))
4770 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4771
4772 /* Highlight stores to memory. */
4773 if (RDG_MEM_WRITE_STMT (rdg, i))
4774 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4775
4776 if (v->succ)
4777 for (e = v->succ; e; e = e->succ_next)
4778 switch (RDGE_TYPE (e))
4779 {
4780 case input_dd:
4781 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4782 break;
4783
4784 case output_dd:
4785 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4786 break;
4787
4788 case flow_dd:
4789 /* These are the most common dependences: don't print these. */
4790 fprintf (file, "%d -> %d \n", i, e->dest);
4791 break;
4792
4793 case anti_dd:
4794 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4795 break;
4796
4797 default:
4798 gcc_unreachable ();
4799 }
4800 }
4801
4802 fprintf (file, "}\n\n");
4803 }
4804
4805 /* Display the Reduced Dependence Graph using dotty. */
4806 extern void dot_rdg (struct graph *);
4807
4808 DEBUG_FUNCTION void
4809 dot_rdg (struct graph *rdg)
4810 {
4811 /* When debugging, enable the following code. This cannot be used
4812 in production compilers because it calls "system". */
4813 #if 0
4814 FILE *file = fopen ("/tmp/rdg.dot", "w");
4815 gcc_assert (file != NULL);
4816
4817 dot_rdg_1 (file, rdg);
4818 fclose (file);
4819
4820 system ("dotty /tmp/rdg.dot &");
4821 #else
4822 dot_rdg_1 (stderr, rdg);
4823 #endif
4824 }
4825
4826 /* This structure is used for recording the mapping statement index in
4827 the RDG. */
4828
4829 struct GTY(()) rdg_vertex_info
4830 {
4831 gimple stmt;
4832 int index;
4833 };
4834
4835 /* Returns the index of STMT in RDG. */
4836
4837 int
4838 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4839 {
4840 struct rdg_vertex_info rvi, *slot;
4841
4842 rvi.stmt = stmt;
4843 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4844
4845 if (!slot)
4846 return -1;
4847
4848 return slot->index;
4849 }
4850
4851 /* Creates an edge in RDG for each distance vector from DDR. The
4852 order that we keep track of in the RDG is the order in which
4853 statements have to be executed. */
4854
4855 static void
4856 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4857 {
4858 struct graph_edge *e;
4859 int va, vb;
4860 data_reference_p dra = DDR_A (ddr);
4861 data_reference_p drb = DDR_B (ddr);
4862 unsigned level = ddr_dependence_level (ddr);
4863
4864 /* For non scalar dependences, when the dependence is REVERSED,
4865 statement B has to be executed before statement A. */
4866 if (level > 0
4867 && !DDR_REVERSED_P (ddr))
4868 {
4869 data_reference_p tmp = dra;
4870 dra = drb;
4871 drb = tmp;
4872 }
4873
4874 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4875 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4876
4877 if (va < 0 || vb < 0)
4878 return;
4879
4880 e = add_edge (rdg, va, vb);
4881 e->data = XNEW (struct rdg_edge);
4882
4883 RDGE_LEVEL (e) = level;
4884 RDGE_RELATION (e) = ddr;
4885
4886 /* Determines the type of the data dependence. */
4887 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4888 RDGE_TYPE (e) = input_dd;
4889 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4890 RDGE_TYPE (e) = output_dd;
4891 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4892 RDGE_TYPE (e) = flow_dd;
4893 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4894 RDGE_TYPE (e) = anti_dd;
4895 }
4896
4897 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4898 the index of DEF in RDG. */
4899
4900 static void
4901 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4902 {
4903 use_operand_p imm_use_p;
4904 imm_use_iterator iterator;
4905
4906 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4907 {
4908 struct graph_edge *e;
4909 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4910
4911 if (use < 0)
4912 continue;
4913
4914 e = add_edge (rdg, idef, use);
4915 e->data = XNEW (struct rdg_edge);
4916 RDGE_TYPE (e) = flow_dd;
4917 RDGE_RELATION (e) = NULL;
4918 }
4919 }
4920
4921 /* Creates the edges of the reduced dependence graph RDG. */
4922
4923 static void
4924 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4925 {
4926 int i;
4927 struct data_dependence_relation *ddr;
4928 def_operand_p def_p;
4929 ssa_op_iter iter;
4930
4931 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4932 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4933 create_rdg_edge_for_ddr (rdg, ddr);
4934
4935 for (i = 0; i < rdg->n_vertices; i++)
4936 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4937 iter, SSA_OP_DEF)
4938 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4939 }
4940
4941 /* Build the vertices of the reduced dependence graph RDG. */
4942
4943 void
4944 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4945 {
4946 int i, j;
4947 gimple stmt;
4948
4949 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4950 {
4951 VEC (data_ref_loc, heap) *references;
4952 data_ref_loc *ref;
4953 struct vertex *v = &(rdg->vertices[i]);
4954 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4955 struct rdg_vertex_info **slot;
4956
4957 rvi->stmt = stmt;
4958 rvi->index = i;
4959 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4960
4961 if (!*slot)
4962 *slot = rvi;
4963 else
4964 free (rvi);
4965
4966 v->data = XNEW (struct rdg_vertex);
4967 RDG_STMT (rdg, i) = stmt;
4968
4969 RDG_MEM_WRITE_STMT (rdg, i) = false;
4970 RDG_MEM_READS_STMT (rdg, i) = false;
4971 if (gimple_code (stmt) == GIMPLE_PHI)
4972 continue;
4973
4974 get_references_in_stmt (stmt, &references);
4975 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4976 if (!ref->is_read)
4977 RDG_MEM_WRITE_STMT (rdg, i) = true;
4978 else
4979 RDG_MEM_READS_STMT (rdg, i) = true;
4980
4981 VEC_free (data_ref_loc, heap, references);
4982 }
4983 }
4984
4985 /* Initialize STMTS with all the statements of LOOP. When
4986 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4987 which we discover statements is important as
4988 generate_loops_for_partition is using the same traversal for
4989 identifying statements. */
4990
4991 static void
4992 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4993 {
4994 unsigned int i;
4995 basic_block *bbs = get_loop_body_in_dom_order (loop);
4996
4997 for (i = 0; i < loop->num_nodes; i++)
4998 {
4999 basic_block bb = bbs[i];
5000 gimple_stmt_iterator bsi;
5001 gimple stmt;
5002
5003 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5004 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5005
5006 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5007 {
5008 stmt = gsi_stmt (bsi);
5009 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5010 VEC_safe_push (gimple, heap, *stmts, stmt);
5011 }
5012 }
5013
5014 free (bbs);
5015 }
5016
5017 /* Returns true when all the dependences are computable. */
5018
5019 static bool
5020 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5021 {
5022 ddr_p ddr;
5023 unsigned int i;
5024
5025 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5026 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5027 return false;
5028
5029 return true;
5030 }
5031
5032 /* Computes a hash function for element ELT. */
5033
5034 static hashval_t
5035 hash_stmt_vertex_info (const void *elt)
5036 {
5037 const struct rdg_vertex_info *const rvi =
5038 (const struct rdg_vertex_info *) elt;
5039 gimple stmt = rvi->stmt;
5040
5041 return htab_hash_pointer (stmt);
5042 }
5043
5044 /* Compares database elements E1 and E2. */
5045
5046 static int
5047 eq_stmt_vertex_info (const void *e1, const void *e2)
5048 {
5049 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5050 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5051
5052 return elt1->stmt == elt2->stmt;
5053 }
5054
5055 /* Free the element E. */
5056
5057 static void
5058 hash_stmt_vertex_del (void *e)
5059 {
5060 free (e);
5061 }
5062
5063 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5064 statement of the loop nest, and one edge per data dependence or
5065 scalar dependence. */
5066
5067 struct graph *
5068 build_empty_rdg (int n_stmts)
5069 {
5070 int nb_data_refs = 10;
5071 struct graph *rdg = new_graph (n_stmts);
5072
5073 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5074 eq_stmt_vertex_info, hash_stmt_vertex_del);
5075 return rdg;
5076 }
5077
5078 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5079 statement of the loop nest, and one edge per data dependence or
5080 scalar dependence. */
5081
5082 struct graph *
5083 build_rdg (struct loop *loop,
5084 VEC (loop_p, heap) **loop_nest,
5085 VEC (ddr_p, heap) **dependence_relations,
5086 VEC (data_reference_p, heap) **datarefs)
5087 {
5088 struct graph *rdg = NULL;
5089 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5090
5091 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5092 dependence_relations);
5093
5094 if (known_dependences_p (*dependence_relations))
5095 {
5096 stmts_from_loop (loop, &stmts);
5097 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5098 create_rdg_vertices (rdg, stmts);
5099 create_rdg_edges (rdg, *dependence_relations);
5100 }
5101
5102 VEC_free (gimple, heap, stmts);
5103 return rdg;
5104 }
5105
5106 /* Free the reduced dependence graph RDG. */
5107
5108 void
5109 free_rdg (struct graph *rdg)
5110 {
5111 int i;
5112
5113 for (i = 0; i < rdg->n_vertices; i++)
5114 {
5115 struct vertex *v = &(rdg->vertices[i]);
5116 struct graph_edge *e;
5117
5118 for (e = v->succ; e; e = e->succ_next)
5119 free (e->data);
5120
5121 free (v->data);
5122 }
5123
5124 htab_delete (rdg->indices);
5125 free_graph (rdg);
5126 }
5127
5128 /* Initialize STMTS with all the statements of LOOP that contain a
5129 store to memory. */
5130
5131 void
5132 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5133 {
5134 unsigned int i;
5135 basic_block *bbs = get_loop_body_in_dom_order (loop);
5136
5137 for (i = 0; i < loop->num_nodes; i++)
5138 {
5139 basic_block bb = bbs[i];
5140 gimple_stmt_iterator bsi;
5141
5142 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5143 if (gimple_vdef (gsi_stmt (bsi)))
5144 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5145 }
5146
5147 free (bbs);
5148 }
5149
5150 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5151 that contains a data reference on its LHS with a stride of the same
5152 size as its unit type. */
5153
5154 bool
5155 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5156 {
5157 tree op0, op1;
5158 bool res;
5159 struct data_reference *dr;
5160
5161 if (!stmt
5162 || !gimple_vdef (stmt)
5163 || !is_gimple_assign (stmt)
5164 || !gimple_assign_single_p (stmt)
5165 || !(op1 = gimple_assign_rhs1 (stmt))
5166 || !(integer_zerop (op1) || real_zerop (op1)))
5167 return false;
5168
5169 dr = XCNEW (struct data_reference);
5170 op0 = gimple_assign_lhs (stmt);
5171
5172 DR_STMT (dr) = stmt;
5173 DR_REF (dr) = op0;
5174
5175 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5176 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5177
5178 free_data_ref (dr);
5179 return res;
5180 }
5181
5182 /* Initialize STMTS with all the statements of LOOP that contain a
5183 store to memory of the form "A[i] = 0". */
5184
5185 void
5186 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5187 {
5188 unsigned int i;
5189 basic_block bb;
5190 gimple_stmt_iterator si;
5191 gimple stmt;
5192 basic_block *bbs = get_loop_body_in_dom_order (loop);
5193
5194 for (i = 0; i < loop->num_nodes; i++)
5195 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5196 if ((stmt = gsi_stmt (si))
5197 && stmt_with_adjacent_zero_store_dr_p (stmt))
5198 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5199
5200 free (bbs);
5201 }
5202
5203 /* For a data reference REF, return the declaration of its base
5204 address or NULL_TREE if the base is not determined. */
5205
5206 static inline tree
5207 ref_base_address (gimple stmt, data_ref_loc *ref)
5208 {
5209 tree base = NULL_TREE;
5210 tree base_address;
5211 struct data_reference *dr = XCNEW (struct data_reference);
5212
5213 DR_STMT (dr) = stmt;
5214 DR_REF (dr) = *ref->pos;
5215 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5216 base_address = DR_BASE_ADDRESS (dr);
5217
5218 if (!base_address)
5219 goto end;
5220
5221 switch (TREE_CODE (base_address))
5222 {
5223 case ADDR_EXPR:
5224 base = TREE_OPERAND (base_address, 0);
5225 break;
5226
5227 default:
5228 base = base_address;
5229 break;
5230 }
5231
5232 end:
5233 free_data_ref (dr);
5234 return base;
5235 }
5236
5237 /* Determines whether the statement from vertex V of the RDG has a
5238 definition used outside the loop that contains this statement. */
5239
5240 bool
5241 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5242 {
5243 gimple stmt = RDG_STMT (rdg, v);
5244 struct loop *loop = loop_containing_stmt (stmt);
5245 use_operand_p imm_use_p;
5246 imm_use_iterator iterator;
5247 ssa_op_iter it;
5248 def_operand_p def_p;
5249
5250 if (!loop)
5251 return true;
5252
5253 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5254 {
5255 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5256 {
5257 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5258 return true;
5259 }
5260 }
5261
5262 return false;
5263 }
5264
5265 /* Determines whether statements S1 and S2 access to similar memory
5266 locations. Two memory accesses are considered similar when they
5267 have the same base address declaration, i.e. when their
5268 ref_base_address is the same. */
5269
5270 bool
5271 have_similar_memory_accesses (gimple s1, gimple s2)
5272 {
5273 bool res = false;
5274 unsigned i, j;
5275 VEC (data_ref_loc, heap) *refs1, *refs2;
5276 data_ref_loc *ref1, *ref2;
5277
5278 get_references_in_stmt (s1, &refs1);
5279 get_references_in_stmt (s2, &refs2);
5280
5281 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5282 {
5283 tree base1 = ref_base_address (s1, ref1);
5284
5285 if (base1)
5286 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5287 if (base1 == ref_base_address (s2, ref2))
5288 {
5289 res = true;
5290 goto end;
5291 }
5292 }
5293
5294 end:
5295 VEC_free (data_ref_loc, heap, refs1);
5296 VEC_free (data_ref_loc, heap, refs2);
5297 return res;
5298 }
5299
5300 /* Helper function for the hashtab. */
5301
5302 static int
5303 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5304 {
5305 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5306 CONST_CAST_GIMPLE ((const_gimple) s2));
5307 }
5308
5309 /* Helper function for the hashtab. */
5310
5311 static hashval_t
5312 ref_base_address_1 (const void *s)
5313 {
5314 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5315 unsigned i;
5316 VEC (data_ref_loc, heap) *refs;
5317 data_ref_loc *ref;
5318 hashval_t res = 0;
5319
5320 get_references_in_stmt (stmt, &refs);
5321
5322 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5323 if (!ref->is_read)
5324 {
5325 res = htab_hash_pointer (ref_base_address (stmt, ref));
5326 break;
5327 }
5328
5329 VEC_free (data_ref_loc, heap, refs);
5330 return res;
5331 }
5332
5333 /* Try to remove duplicated write data references from STMTS. */
5334
5335 void
5336 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5337 {
5338 unsigned i;
5339 gimple stmt;
5340 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5341 have_similar_memory_accesses_1, NULL);
5342
5343 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5344 {
5345 void **slot;
5346
5347 slot = htab_find_slot (seen, stmt, INSERT);
5348
5349 if (*slot)
5350 VEC_ordered_remove (gimple, *stmts, i);
5351 else
5352 {
5353 *slot = (void *) stmt;
5354 i++;
5355 }
5356 }
5357
5358 htab_delete (seen);
5359 }
5360
5361 /* Returns the index of PARAMETER in the parameters vector of the
5362 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5363
5364 int
5365 access_matrix_get_index_for_parameter (tree parameter,
5366 struct access_matrix *access_matrix)
5367 {
5368 int i;
5369 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5370 tree lambda_parameter;
5371
5372 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5373 if (lambda_parameter == parameter)
5374 return i + AM_NB_INDUCTION_VARS (access_matrix);
5375
5376 return -1;
5377 }