1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
82 #include "double-int.h"
90 #include "fold-const.h"
92 #include "gimple-pretty-print.h"
95 #include "hard-reg-set.h"
98 #include "dominance.h"
100 #include "basic-block.h"
101 #include "tree-ssa-alias.h"
102 #include "internal-fn.h"
103 #include "gimple-expr.h"
106 #include "gimple-iterator.h"
107 #include "tree-ssa-loop-niter.h"
108 #include "tree-ssa-loop.h"
109 #include "tree-ssa.h"
111 #include "tree-data-ref.h"
112 #include "tree-scalar-evolution.h"
113 #include "dumpfile.h"
114 #include "langhooks.h"
115 #include "tree-affine.h"
118 static struct datadep_stats
120 int num_dependence_tests
;
121 int num_dependence_dependent
;
122 int num_dependence_independent
;
123 int num_dependence_undetermined
;
125 int num_subscript_tests
;
126 int num_subscript_undetermined
;
127 int num_same_subscript_function
;
130 int num_ziv_independent
;
131 int num_ziv_dependent
;
132 int num_ziv_unimplemented
;
135 int num_siv_independent
;
136 int num_siv_dependent
;
137 int num_siv_unimplemented
;
140 int num_miv_independent
;
141 int num_miv_dependent
;
142 int num_miv_unimplemented
;
145 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
146 struct data_reference
*,
147 struct data_reference
*,
149 /* Returns true iff A divides B. */
152 tree_fold_divides_p (const_tree a
, const_tree b
)
154 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
155 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
156 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
159 /* Returns true iff A divides B. */
162 int_divides_p (int a
, int b
)
164 return ((b
% a
) == 0);
169 /* Dump into FILE all the data references from DATAREFS. */
172 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
175 struct data_reference
*dr
;
177 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
178 dump_data_reference (file
, dr
);
181 /* Unified dump into FILE all the data references from DATAREFS. */
184 debug (vec
<data_reference_p
> &ref
)
186 dump_data_references (stderr
, ref
);
190 debug (vec
<data_reference_p
> *ptr
)
195 fprintf (stderr
, "<nil>\n");
199 /* Dump into STDERR all the data references from DATAREFS. */
202 debug_data_references (vec
<data_reference_p
> datarefs
)
204 dump_data_references (stderr
, datarefs
);
207 /* Print to STDERR the data_reference DR. */
210 debug_data_reference (struct data_reference
*dr
)
212 dump_data_reference (stderr
, dr
);
215 /* Dump function for a DATA_REFERENCE structure. */
218 dump_data_reference (FILE *outf
,
219 struct data_reference
*dr
)
223 fprintf (outf
, "#(Data Ref: \n");
224 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
225 fprintf (outf
, "# stmt: ");
226 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
227 fprintf (outf
, "# ref: ");
228 print_generic_stmt (outf
, DR_REF (dr
), 0);
229 fprintf (outf
, "# base_object: ");
230 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
232 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
234 fprintf (outf
, "# Access function %d: ", i
);
235 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
237 fprintf (outf
, "#)\n");
240 /* Unified dump function for a DATA_REFERENCE structure. */
243 debug (data_reference
&ref
)
245 dump_data_reference (stderr
, &ref
);
249 debug (data_reference
*ptr
)
254 fprintf (stderr
, "<nil>\n");
258 /* Dumps the affine function described by FN to the file OUTF. */
261 dump_affine_function (FILE *outf
, affine_fn fn
)
266 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
267 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
269 fprintf (outf
, " + ");
270 print_generic_expr (outf
, coef
, TDF_SLIM
);
271 fprintf (outf
, " * x_%u", i
);
275 /* Dumps the conflict function CF to the file OUTF. */
278 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
282 if (cf
->n
== NO_DEPENDENCE
)
283 fprintf (outf
, "no dependence");
284 else if (cf
->n
== NOT_KNOWN
)
285 fprintf (outf
, "not known");
288 for (i
= 0; i
< cf
->n
; i
++)
293 dump_affine_function (outf
, cf
->fns
[i
]);
299 /* Dump function for a SUBSCRIPT structure. */
302 dump_subscript (FILE *outf
, struct subscript
*subscript
)
304 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
306 fprintf (outf
, "\n (subscript \n");
307 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
308 dump_conflict_function (outf
, cf
);
309 if (CF_NONTRIVIAL_P (cf
))
311 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
312 fprintf (outf
, "\n last_conflict: ");
313 print_generic_expr (outf
, last_iteration
, 0);
316 cf
= SUB_CONFLICTS_IN_B (subscript
);
317 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
318 dump_conflict_function (outf
, cf
);
319 if (CF_NONTRIVIAL_P (cf
))
321 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
322 fprintf (outf
, "\n last_conflict: ");
323 print_generic_expr (outf
, last_iteration
, 0);
326 fprintf (outf
, "\n (Subscript distance: ");
327 print_generic_expr (outf
, SUB_DISTANCE (subscript
), 0);
328 fprintf (outf
, " ))\n");
331 /* Print the classic direction vector DIRV to OUTF. */
334 print_direction_vector (FILE *outf
,
340 for (eq
= 0; eq
< length
; eq
++)
342 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
348 fprintf (outf
, " +");
351 fprintf (outf
, " -");
354 fprintf (outf
, " =");
356 case dir_positive_or_equal
:
357 fprintf (outf
, " +=");
359 case dir_positive_or_negative
:
360 fprintf (outf
, " +-");
362 case dir_negative_or_equal
:
363 fprintf (outf
, " -=");
366 fprintf (outf
, " *");
369 fprintf (outf
, "indep");
373 fprintf (outf
, "\n");
376 /* Print a vector of direction vectors. */
379 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
385 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
386 print_direction_vector (outf
, v
, length
);
389 /* Print out a vector VEC of length N to OUTFILE. */
392 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
396 for (i
= 0; i
< n
; i
++)
397 fprintf (outfile
, "%3d ", vector
[i
]);
398 fprintf (outfile
, "\n");
401 /* Print a vector of distance vectors. */
404 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
410 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
411 print_lambda_vector (outf
, v
, length
);
414 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
417 dump_data_dependence_relation (FILE *outf
,
418 struct data_dependence_relation
*ddr
)
420 struct data_reference
*dra
, *drb
;
422 fprintf (outf
, "(Data Dep: \n");
424 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
431 dump_data_reference (outf
, dra
);
433 fprintf (outf
, " (nil)\n");
435 dump_data_reference (outf
, drb
);
437 fprintf (outf
, " (nil)\n");
439 fprintf (outf
, " (don't know)\n)\n");
445 dump_data_reference (outf
, dra
);
446 dump_data_reference (outf
, drb
);
448 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
449 fprintf (outf
, " (no dependence)\n");
451 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
456 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
458 fprintf (outf
, " access_fn_A: ");
459 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
460 fprintf (outf
, " access_fn_B: ");
461 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
462 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
465 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
466 fprintf (outf
, " loop nest: (");
467 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
468 fprintf (outf
, "%d ", loopi
->num
);
469 fprintf (outf
, ")\n");
471 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
473 fprintf (outf
, " distance_vector: ");
474 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
478 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
480 fprintf (outf
, " direction_vector: ");
481 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
486 fprintf (outf
, ")\n");
492 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
494 dump_data_dependence_relation (stderr
, ddr
);
497 /* Dump into FILE all the dependence relations from DDRS. */
500 dump_data_dependence_relations (FILE *file
,
504 struct data_dependence_relation
*ddr
;
506 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
507 dump_data_dependence_relation (file
, ddr
);
511 debug (vec
<ddr_p
> &ref
)
513 dump_data_dependence_relations (stderr
, ref
);
517 debug (vec
<ddr_p
> *ptr
)
522 fprintf (stderr
, "<nil>\n");
526 /* Dump to STDERR all the dependence relations from DDRS. */
529 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
531 dump_data_dependence_relations (stderr
, ddrs
);
534 /* Dumps the distance and direction vectors in FILE. DDRS contains
535 the dependence relations, and VECT_SIZE is the size of the
536 dependence vectors, or in other words the number of loops in the
540 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
543 struct data_dependence_relation
*ddr
;
546 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
547 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
549 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
551 fprintf (file
, "DISTANCE_V (");
552 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
553 fprintf (file
, ")\n");
556 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
558 fprintf (file
, "DIRECTION_V (");
559 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
560 fprintf (file
, ")\n");
564 fprintf (file
, "\n\n");
567 /* Dumps the data dependence relations DDRS in FILE. */
570 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
573 struct data_dependence_relation
*ddr
;
575 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
576 dump_data_dependence_relation (file
, ddr
);
578 fprintf (file
, "\n\n");
582 debug_ddrs (vec
<ddr_p
> ddrs
)
584 dump_ddrs (stderr
, ddrs
);
587 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
588 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
589 constant of type ssizetype, and returns true. If we cannot do this
590 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
594 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
595 tree
*var
, tree
*off
)
599 enum tree_code ocode
= code
;
607 *var
= build_int_cst (type
, 0);
608 *off
= fold_convert (ssizetype
, op0
);
611 case POINTER_PLUS_EXPR
:
616 split_constant_offset (op0
, &var0
, &off0
);
617 split_constant_offset (op1
, &var1
, &off1
);
618 *var
= fold_build2 (code
, type
, var0
, var1
);
619 *off
= size_binop (ocode
, off0
, off1
);
623 if (TREE_CODE (op1
) != INTEGER_CST
)
626 split_constant_offset (op0
, &var0
, &off0
);
627 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
628 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
634 HOST_WIDE_INT pbitsize
, pbitpos
;
636 int punsignedp
, pvolatilep
;
638 op0
= TREE_OPERAND (op0
, 0);
639 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
640 &pmode
, &punsignedp
, &pvolatilep
, false);
642 if (pbitpos
% BITS_PER_UNIT
!= 0)
644 base
= build_fold_addr_expr (base
);
645 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
649 split_constant_offset (poffset
, &poffset
, &off1
);
650 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
651 if (POINTER_TYPE_P (TREE_TYPE (base
)))
652 base
= fold_build_pointer_plus (base
, poffset
);
654 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
655 fold_convert (TREE_TYPE (base
), poffset
));
658 var0
= fold_convert (type
, base
);
660 /* If variable length types are involved, punt, otherwise casts
661 might be converted into ARRAY_REFs in gimplify_conversion.
662 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
663 possibly no longer appears in current GIMPLE, might resurface.
664 This perhaps could run
665 if (CONVERT_EXPR_P (var0))
667 gimplify_conversion (&var0);
668 // Attempt to fill in any within var0 found ARRAY_REF's
669 // element size from corresponding op embedded ARRAY_REF,
670 // if unsuccessful, just punt.
672 while (POINTER_TYPE_P (type
))
673 type
= TREE_TYPE (type
);
674 if (int_size_in_bytes (type
) < 0)
684 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
687 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
688 enum tree_code subcode
;
690 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
693 var0
= gimple_assign_rhs1 (def_stmt
);
694 subcode
= gimple_assign_rhs_code (def_stmt
);
695 var1
= gimple_assign_rhs2 (def_stmt
);
697 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
701 /* We must not introduce undefined overflow, and we must not change the value.
702 Hence we're okay if the inner type doesn't overflow to start with
703 (pointer or signed), the outer type also is an integer or pointer
704 and the outer precision is at least as large as the inner. */
705 tree itype
= TREE_TYPE (op0
);
706 if ((POINTER_TYPE_P (itype
)
707 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
708 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
709 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
711 split_constant_offset (op0
, &var0
, off
);
712 *var
= fold_convert (type
, var0
);
723 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
724 will be ssizetype. */
727 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
729 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
733 *off
= ssize_int (0);
736 if (tree_is_chrec (exp
)
737 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
740 otype
= TREE_TYPE (exp
);
741 code
= TREE_CODE (exp
);
742 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
743 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
745 *var
= fold_convert (type
, e
);
750 /* Returns the address ADDR of an object in a canonical shape (without nop
751 casts, and with type of pointer to the object). */
754 canonicalize_base_object_address (tree addr
)
760 /* The base address may be obtained by casting from integer, in that case
762 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
765 if (TREE_CODE (addr
) != ADDR_EXPR
)
768 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
771 /* Analyzes the behavior of the memory reference DR in the innermost loop or
772 basic block that contains it. Returns true if analysis succeed or false
776 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
778 gimple stmt
= DR_STMT (dr
);
779 struct loop
*loop
= loop_containing_stmt (stmt
);
780 tree ref
= DR_REF (dr
);
781 HOST_WIDE_INT pbitsize
, pbitpos
;
784 int punsignedp
, pvolatilep
;
785 affine_iv base_iv
, offset_iv
;
786 tree init
, dinit
, step
;
787 bool in_loop
= (loop
&& loop
->num
);
789 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
790 fprintf (dump_file
, "analyze_innermost: ");
792 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
793 &pmode
, &punsignedp
, &pvolatilep
, false);
794 gcc_assert (base
!= NULL_TREE
);
796 if (pbitpos
% BITS_PER_UNIT
!= 0)
798 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
799 fprintf (dump_file
, "failed: bit offset alignment.\n");
803 if (TREE_CODE (base
) == MEM_REF
)
805 if (!integer_zerop (TREE_OPERAND (base
, 1)))
807 offset_int moff
= mem_ref_offset (base
);
808 tree mofft
= wide_int_to_tree (sizetype
, moff
);
812 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
814 base
= TREE_OPERAND (base
, 0);
817 base
= build_fold_addr_expr (base
);
821 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
822 nest
? true : false))
826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
827 fprintf (dump_file
, "failed: evolution of base is not"
834 base_iv
.step
= ssize_int (0);
835 base_iv
.no_overflow
= true;
842 base_iv
.step
= ssize_int (0);
843 base_iv
.no_overflow
= true;
848 offset_iv
.base
= ssize_int (0);
849 offset_iv
.step
= ssize_int (0);
855 offset_iv
.base
= poffset
;
856 offset_iv
.step
= ssize_int (0);
858 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
860 nest
? true : false))
864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
865 fprintf (dump_file
, "failed: evolution of offset is not"
871 offset_iv
.base
= poffset
;
872 offset_iv
.step
= ssize_int (0);
877 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
878 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
879 init
= size_binop (PLUS_EXPR
, init
, dinit
);
880 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
881 init
= size_binop (PLUS_EXPR
, init
, dinit
);
883 step
= size_binop (PLUS_EXPR
,
884 fold_convert (ssizetype
, base_iv
.step
),
885 fold_convert (ssizetype
, offset_iv
.step
));
887 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
889 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
893 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
895 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
896 fprintf (dump_file
, "success.\n");
901 /* Determines the base object and the list of indices of memory reference
902 DR, analyzed in LOOP and instantiated in loop nest NEST. */
905 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
907 vec
<tree
> access_fns
= vNULL
;
909 tree base
, off
, access_fn
;
910 basic_block before_loop
;
912 /* If analyzing a basic-block there are no indices to analyze
913 and thus no access functions. */
916 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
917 DR_ACCESS_FNS (dr
).create (0);
922 before_loop
= block_before_loop (nest
);
924 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
925 into a two element array with a constant index. The base is
926 then just the immediate underlying object. */
927 if (TREE_CODE (ref
) == REALPART_EXPR
)
929 ref
= TREE_OPERAND (ref
, 0);
930 access_fns
.safe_push (integer_zero_node
);
932 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
934 ref
= TREE_OPERAND (ref
, 0);
935 access_fns
.safe_push (integer_one_node
);
938 /* Analyze access functions of dimensions we know to be independent. */
939 while (handled_component_p (ref
))
941 if (TREE_CODE (ref
) == ARRAY_REF
)
943 op
= TREE_OPERAND (ref
, 1);
944 access_fn
= analyze_scalar_evolution (loop
, op
);
945 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
946 access_fns
.safe_push (access_fn
);
948 else if (TREE_CODE (ref
) == COMPONENT_REF
949 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
951 /* For COMPONENT_REFs of records (but not unions!) use the
952 FIELD_DECL offset as constant access function so we can
953 disambiguate a[i].f1 and a[i].f2. */
954 tree off
= component_ref_field_offset (ref
);
955 off
= size_binop (PLUS_EXPR
,
956 size_binop (MULT_EXPR
,
957 fold_convert (bitsizetype
, off
),
958 bitsize_int (BITS_PER_UNIT
)),
959 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
960 access_fns
.safe_push (off
);
963 /* If we have an unhandled component we could not translate
964 to an access function stop analyzing. We have determined
965 our base object in this case. */
968 ref
= TREE_OPERAND (ref
, 0);
971 /* If the address operand of a MEM_REF base has an evolution in the
972 analyzed nest, add it as an additional independent access-function. */
973 if (TREE_CODE (ref
) == MEM_REF
)
975 op
= TREE_OPERAND (ref
, 0);
976 access_fn
= analyze_scalar_evolution (loop
, op
);
977 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
978 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
981 tree memoff
= TREE_OPERAND (ref
, 1);
982 base
= initial_condition (access_fn
);
983 orig_type
= TREE_TYPE (base
);
984 STRIP_USELESS_TYPE_CONVERSION (base
);
985 split_constant_offset (base
, &base
, &off
);
986 STRIP_USELESS_TYPE_CONVERSION (base
);
987 /* Fold the MEM_REF offset into the evolutions initial
988 value to make more bases comparable. */
989 if (!integer_zerop (memoff
))
991 off
= size_binop (PLUS_EXPR
, off
,
992 fold_convert (ssizetype
, memoff
));
993 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
995 access_fn
= chrec_replace_initial_condition
996 (access_fn
, fold_convert (orig_type
, off
));
997 /* ??? This is still not a suitable base object for
998 dr_may_alias_p - the base object needs to be an
999 access that covers the object as whole. With
1000 an evolution in the pointer this cannot be
1002 As a band-aid, mark the access so we can special-case
1003 it in dr_may_alias_p. */
1005 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1006 MEM_REF
, TREE_TYPE (ref
),
1008 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1009 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1010 access_fns
.safe_push (access_fn
);
1013 else if (DECL_P (ref
))
1015 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1016 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1017 build_fold_addr_expr (ref
),
1018 build_int_cst (reference_alias_ptr_type (ref
), 0));
1021 DR_BASE_OBJECT (dr
) = ref
;
1022 DR_ACCESS_FNS (dr
) = access_fns
;
1025 /* Extracts the alias analysis information from the memory reference DR. */
1028 dr_analyze_alias (struct data_reference
*dr
)
1030 tree ref
= DR_REF (dr
);
1031 tree base
= get_base_address (ref
), addr
;
1033 if (INDIRECT_REF_P (base
)
1034 || TREE_CODE (base
) == MEM_REF
)
1036 addr
= TREE_OPERAND (base
, 0);
1037 if (TREE_CODE (addr
) == SSA_NAME
)
1038 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1042 /* Frees data reference DR. */
1045 free_data_ref (data_reference_p dr
)
1047 DR_ACCESS_FNS (dr
).release ();
1051 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1052 is read if IS_READ is true, write otherwise. Returns the
1053 data_reference description of MEMREF. NEST is the outermost loop
1054 in which the reference should be instantiated, LOOP is the loop in
1055 which the data reference should be analyzed. */
1057 struct data_reference
*
1058 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
1061 struct data_reference
*dr
;
1063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1065 fprintf (dump_file
, "Creating dr for ");
1066 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1067 fprintf (dump_file
, "\n");
1070 dr
= XCNEW (struct data_reference
);
1071 DR_STMT (dr
) = stmt
;
1072 DR_REF (dr
) = memref
;
1073 DR_IS_READ (dr
) = is_read
;
1075 dr_analyze_innermost (dr
, nest
);
1076 dr_analyze_indices (dr
, nest
, loop
);
1077 dr_analyze_alias (dr
);
1079 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1082 fprintf (dump_file
, "\tbase_address: ");
1083 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1084 fprintf (dump_file
, "\n\toffset from base address: ");
1085 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1086 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1087 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1088 fprintf (dump_file
, "\n\tstep: ");
1089 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1090 fprintf (dump_file
, "\n\taligned to: ");
1091 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1092 fprintf (dump_file
, "\n\tbase_object: ");
1093 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1094 fprintf (dump_file
, "\n");
1095 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1097 fprintf (dump_file
, "\tAccess function %d: ", i
);
1098 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1105 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1108 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1112 STRIP_NOPS (offset1
);
1113 STRIP_NOPS (offset2
);
1115 if (offset1
== offset2
)
1118 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1119 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1122 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1123 TREE_OPERAND (offset2
, 0));
1125 if (!res
|| !BINARY_CLASS_P (offset1
))
1128 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1129 TREE_OPERAND (offset2
, 1));
1134 /* Check if DRA and DRB have equal offsets. */
1136 dr_equal_offsets_p (struct data_reference
*dra
,
1137 struct data_reference
*drb
)
1139 tree offset1
, offset2
;
1141 offset1
= DR_OFFSET (dra
);
1142 offset2
= DR_OFFSET (drb
);
1144 return dr_equal_offsets_p1 (offset1
, offset2
);
1147 /* Returns true if FNA == FNB. */
1150 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1152 unsigned i
, n
= fna
.length ();
1154 if (n
!= fnb
.length ())
1157 for (i
= 0; i
< n
; i
++)
1158 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1164 /* If all the functions in CF are the same, returns one of them,
1165 otherwise returns NULL. */
1168 common_affine_function (conflict_function
*cf
)
1173 if (!CF_NONTRIVIAL_P (cf
))
1174 return affine_fn ();
1178 for (i
= 1; i
< cf
->n
; i
++)
1179 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1180 return affine_fn ();
1185 /* Returns the base of the affine function FN. */
1188 affine_function_base (affine_fn fn
)
1193 /* Returns true if FN is a constant. */
1196 affine_function_constant_p (affine_fn fn
)
1201 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1202 if (!integer_zerop (coef
))
1208 /* Returns true if FN is the zero constant function. */
1211 affine_function_zero_p (affine_fn fn
)
1213 return (integer_zerop (affine_function_base (fn
))
1214 && affine_function_constant_p (fn
));
1217 /* Returns a signed integer type with the largest precision from TA
1221 signed_type_for_types (tree ta
, tree tb
)
1223 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1224 return signed_type_for (ta
);
1226 return signed_type_for (tb
);
1229 /* Applies operation OP on affine functions FNA and FNB, and returns the
1233 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1239 if (fnb
.length () > fna
.length ())
1251 for (i
= 0; i
< n
; i
++)
1253 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1254 TREE_TYPE (fnb
[i
]));
1255 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1258 for (; fna
.iterate (i
, &coef
); i
++)
1259 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1260 coef
, integer_zero_node
));
1261 for (; fnb
.iterate (i
, &coef
); i
++)
1262 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1263 integer_zero_node
, coef
));
1268 /* Returns the sum of affine functions FNA and FNB. */
1271 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1273 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1276 /* Returns the difference of affine functions FNA and FNB. */
1279 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1281 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1284 /* Frees affine function FN. */
1287 affine_fn_free (affine_fn fn
)
1292 /* Determine for each subscript in the data dependence relation DDR
1296 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1298 conflict_function
*cf_a
, *cf_b
;
1299 affine_fn fn_a
, fn_b
, diff
;
1301 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1305 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1307 struct subscript
*subscript
;
1309 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1310 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1311 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1313 fn_a
= common_affine_function (cf_a
);
1314 fn_b
= common_affine_function (cf_b
);
1315 if (!fn_a
.exists () || !fn_b
.exists ())
1317 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1320 diff
= affine_fn_minus (fn_a
, fn_b
);
1322 if (affine_function_constant_p (diff
))
1323 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1325 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1327 affine_fn_free (diff
);
1332 /* Returns the conflict function for "unknown". */
1334 static conflict_function
*
1335 conflict_fn_not_known (void)
1337 conflict_function
*fn
= XCNEW (conflict_function
);
1343 /* Returns the conflict function for "independent". */
1345 static conflict_function
*
1346 conflict_fn_no_dependence (void)
1348 conflict_function
*fn
= XCNEW (conflict_function
);
1349 fn
->n
= NO_DEPENDENCE
;
1354 /* Returns true if the address of OBJ is invariant in LOOP. */
1357 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1359 while (handled_component_p (obj
))
1361 if (TREE_CODE (obj
) == ARRAY_REF
)
1363 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1364 need to check the stride and the lower bound of the reference. */
1365 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1367 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1371 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1373 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1377 obj
= TREE_OPERAND (obj
, 0);
1380 if (!INDIRECT_REF_P (obj
)
1381 && TREE_CODE (obj
) != MEM_REF
)
1384 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1388 /* Returns false if we can prove that data references A and B do not alias,
1389 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1393 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1396 tree addr_a
= DR_BASE_OBJECT (a
);
1397 tree addr_b
= DR_BASE_OBJECT (b
);
1399 /* If we are not processing a loop nest but scalar code we
1400 do not need to care about possible cross-iteration dependences
1401 and thus can process the full original reference. Do so,
1402 similar to how loop invariant motion applies extra offset-based
1406 aff_tree off1
, off2
;
1407 widest_int size1
, size2
;
1408 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1409 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1410 aff_combination_scale (&off1
, -1);
1411 aff_combination_add (&off2
, &off1
);
1412 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1416 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
1417 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
1418 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
1419 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
1422 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1423 do not know the size of the base-object. So we cannot do any
1424 offset/overlap based analysis but have to rely on points-to
1425 information only. */
1426 if (TREE_CODE (addr_a
) == MEM_REF
1427 && TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
)
1429 /* For true dependences we can apply TBAA. */
1430 if (flag_strict_aliasing
1431 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1432 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1433 get_alias_set (DR_REF (b
))))
1435 if (TREE_CODE (addr_b
) == MEM_REF
)
1436 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1437 TREE_OPERAND (addr_b
, 0));
1439 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1440 build_fold_addr_expr (addr_b
));
1442 else if (TREE_CODE (addr_b
) == MEM_REF
1443 && TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
)
1445 /* For true dependences we can apply TBAA. */
1446 if (flag_strict_aliasing
1447 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1448 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1449 get_alias_set (DR_REF (b
))))
1451 if (TREE_CODE (addr_a
) == MEM_REF
)
1452 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1453 TREE_OPERAND (addr_b
, 0));
1455 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1456 TREE_OPERAND (addr_b
, 0));
1459 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1460 that is being subsetted in the loop nest. */
1461 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1462 return refs_output_dependent_p (addr_a
, addr_b
);
1463 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1464 return refs_anti_dependent_p (addr_a
, addr_b
);
1465 return refs_may_alias_p (addr_a
, addr_b
);
1468 /* Initialize a data dependence relation between data accesses A and
1469 B. NB_LOOPS is the number of loops surrounding the references: the
1470 size of the classic distance/direction vectors. */
1472 struct data_dependence_relation
*
1473 initialize_data_dependence_relation (struct data_reference
*a
,
1474 struct data_reference
*b
,
1475 vec
<loop_p
> loop_nest
)
1477 struct data_dependence_relation
*res
;
1480 res
= XNEW (struct data_dependence_relation
);
1483 DDR_LOOP_NEST (res
).create (0);
1484 DDR_REVERSED_P (res
) = false;
1485 DDR_SUBSCRIPTS (res
).create (0);
1486 DDR_DIR_VECTS (res
).create (0);
1487 DDR_DIST_VECTS (res
).create (0);
1489 if (a
== NULL
|| b
== NULL
)
1491 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1495 /* If the data references do not alias, then they are independent. */
1496 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1498 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1502 /* The case where the references are exactly the same. */
1503 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1505 if (loop_nest
.exists ()
1506 && !object_address_invariant_in_loop_p (loop_nest
[0],
1507 DR_BASE_OBJECT (a
)))
1509 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1512 DDR_AFFINE_P (res
) = true;
1513 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1514 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1515 DDR_LOOP_NEST (res
) = loop_nest
;
1516 DDR_INNER_LOOP (res
) = 0;
1517 DDR_SELF_REFERENCE (res
) = true;
1518 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1520 struct subscript
*subscript
;
1522 subscript
= XNEW (struct subscript
);
1523 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1524 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1525 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1526 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1527 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1532 /* If the references do not access the same object, we do not know
1533 whether they alias or not. */
1534 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1536 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1540 /* If the base of the object is not invariant in the loop nest, we cannot
1541 analyze it. TODO -- in fact, it would suffice to record that there may
1542 be arbitrary dependences in the loops where the base object varies. */
1543 if (loop_nest
.exists ()
1544 && !object_address_invariant_in_loop_p (loop_nest
[0],
1545 DR_BASE_OBJECT (a
)))
1547 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1551 /* If the number of dimensions of the access to not agree we can have
1552 a pointer access to a component of the array element type and an
1553 array access while the base-objects are still the same. Punt. */
1554 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1556 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1560 DDR_AFFINE_P (res
) = true;
1561 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1562 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1563 DDR_LOOP_NEST (res
) = loop_nest
;
1564 DDR_INNER_LOOP (res
) = 0;
1565 DDR_SELF_REFERENCE (res
) = false;
1567 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1569 struct subscript
*subscript
;
1571 subscript
= XNEW (struct subscript
);
1572 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1573 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1574 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1575 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1576 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1582 /* Frees memory used by the conflict function F. */
1585 free_conflict_function (conflict_function
*f
)
1589 if (CF_NONTRIVIAL_P (f
))
1591 for (i
= 0; i
< f
->n
; i
++)
1592 affine_fn_free (f
->fns
[i
]);
1597 /* Frees memory used by SUBSCRIPTS. */
1600 free_subscripts (vec
<subscript_p
> subscripts
)
1605 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1607 free_conflict_function (s
->conflicting_iterations_in_a
);
1608 free_conflict_function (s
->conflicting_iterations_in_b
);
1611 subscripts
.release ();
1614 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1618 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1621 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1622 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1623 DDR_SUBSCRIPTS (ddr
).create (0);
1626 /* The dependence relation DDR cannot be represented by a distance
1630 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1632 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1633 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1635 DDR_AFFINE_P (ddr
) = false;
1640 /* This section contains the classic Banerjee tests. */
1642 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1643 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1646 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1648 return (evolution_function_is_constant_p (chrec_a
)
1649 && evolution_function_is_constant_p (chrec_b
));
1652 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1653 variable, i.e., if the SIV (Single Index Variable) test is true. */
1656 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1658 if ((evolution_function_is_constant_p (chrec_a
)
1659 && evolution_function_is_univariate_p (chrec_b
))
1660 || (evolution_function_is_constant_p (chrec_b
)
1661 && evolution_function_is_univariate_p (chrec_a
)))
1664 if (evolution_function_is_univariate_p (chrec_a
)
1665 && evolution_function_is_univariate_p (chrec_b
))
1667 switch (TREE_CODE (chrec_a
))
1669 case POLYNOMIAL_CHREC
:
1670 switch (TREE_CODE (chrec_b
))
1672 case POLYNOMIAL_CHREC
:
1673 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1688 /* Creates a conflict function with N dimensions. The affine functions
1689 in each dimension follow. */
1691 static conflict_function
*
1692 conflict_fn (unsigned n
, ...)
1695 conflict_function
*ret
= XCNEW (conflict_function
);
1698 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1702 for (i
= 0; i
< n
; i
++)
1703 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1709 /* Returns constant affine function with value CST. */
1712 affine_fn_cst (tree cst
)
1716 fn
.quick_push (cst
);
1720 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1723 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1726 fn
.create (dim
+ 1);
1729 gcc_assert (dim
> 0);
1730 fn
.quick_push (cst
);
1731 for (i
= 1; i
< dim
; i
++)
1732 fn
.quick_push (integer_zero_node
);
1733 fn
.quick_push (coef
);
1737 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1738 *OVERLAPS_B are initialized to the functions that describe the
1739 relation between the elements accessed twice by CHREC_A and
1740 CHREC_B. For k >= 0, the following property is verified:
1742 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1745 analyze_ziv_subscript (tree chrec_a
,
1747 conflict_function
**overlaps_a
,
1748 conflict_function
**overlaps_b
,
1749 tree
*last_conflicts
)
1751 tree type
, difference
;
1752 dependence_stats
.num_ziv
++;
1754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1755 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1757 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1758 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1759 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1760 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1762 switch (TREE_CODE (difference
))
1765 if (integer_zerop (difference
))
1767 /* The difference is equal to zero: the accessed index
1768 overlaps for each iteration in the loop. */
1769 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1770 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1771 *last_conflicts
= chrec_dont_know
;
1772 dependence_stats
.num_ziv_dependent
++;
1776 /* The accesses do not overlap. */
1777 *overlaps_a
= conflict_fn_no_dependence ();
1778 *overlaps_b
= conflict_fn_no_dependence ();
1779 *last_conflicts
= integer_zero_node
;
1780 dependence_stats
.num_ziv_independent
++;
1785 /* We're not sure whether the indexes overlap. For the moment,
1786 conservatively answer "don't know". */
1787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1788 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1790 *overlaps_a
= conflict_fn_not_known ();
1791 *overlaps_b
= conflict_fn_not_known ();
1792 *last_conflicts
= chrec_dont_know
;
1793 dependence_stats
.num_ziv_unimplemented
++;
1797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1798 fprintf (dump_file
, ")\n");
1801 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1802 and only if it fits to the int type. If this is not the case, or the
1803 bound on the number of iterations of LOOP could not be derived, returns
1807 max_stmt_executions_tree (struct loop
*loop
)
1811 if (!max_stmt_executions (loop
, &nit
))
1812 return chrec_dont_know
;
1814 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
1815 return chrec_dont_know
;
1817 return wide_int_to_tree (unsigned_type_node
, nit
);
1820 /* Determine whether the CHREC is always positive/negative. If the expression
1821 cannot be statically analyzed, return false, otherwise set the answer into
1825 chrec_is_positive (tree chrec
, bool *value
)
1827 bool value0
, value1
, value2
;
1828 tree end_value
, nb_iter
;
1830 switch (TREE_CODE (chrec
))
1832 case POLYNOMIAL_CHREC
:
1833 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1834 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1837 /* FIXME -- overflows. */
1838 if (value0
== value1
)
1844 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1845 and the proof consists in showing that the sign never
1846 changes during the execution of the loop, from 0 to
1847 loop->nb_iterations. */
1848 if (!evolution_function_is_affine_p (chrec
))
1851 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1852 if (chrec_contains_undetermined (nb_iter
))
1856 /* TODO -- If the test is after the exit, we may decrease the number of
1857 iterations by one. */
1859 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1862 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1864 if (!chrec_is_positive (end_value
, &value2
))
1868 return value0
== value1
;
1871 switch (tree_int_cst_sgn (chrec
))
1890 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1891 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1892 *OVERLAPS_B are initialized to the functions that describe the
1893 relation between the elements accessed twice by CHREC_A and
1894 CHREC_B. For k >= 0, the following property is verified:
1896 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1899 analyze_siv_subscript_cst_affine (tree chrec_a
,
1901 conflict_function
**overlaps_a
,
1902 conflict_function
**overlaps_b
,
1903 tree
*last_conflicts
)
1905 bool value0
, value1
, value2
;
1906 tree type
, difference
, tmp
;
1908 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1909 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1910 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1911 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1913 /* Special case overlap in the first iteration. */
1914 if (integer_zerop (difference
))
1916 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1917 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1918 *last_conflicts
= integer_one_node
;
1922 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1925 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1927 dependence_stats
.num_siv_unimplemented
++;
1928 *overlaps_a
= conflict_fn_not_known ();
1929 *overlaps_b
= conflict_fn_not_known ();
1930 *last_conflicts
= chrec_dont_know
;
1935 if (value0
== false)
1937 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1940 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1942 *overlaps_a
= conflict_fn_not_known ();
1943 *overlaps_b
= conflict_fn_not_known ();
1944 *last_conflicts
= chrec_dont_know
;
1945 dependence_stats
.num_siv_unimplemented
++;
1954 chrec_b = {10, +, 1}
1957 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1959 HOST_WIDE_INT numiter
;
1960 struct loop
*loop
= get_chrec_loop (chrec_b
);
1962 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1963 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1964 fold_build1 (ABS_EXPR
, type
, difference
),
1965 CHREC_RIGHT (chrec_b
));
1966 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1967 *last_conflicts
= integer_one_node
;
1970 /* Perform weak-zero siv test to see if overlap is
1971 outside the loop bounds. */
1972 numiter
= max_stmt_executions_int (loop
);
1975 && compare_tree_int (tmp
, numiter
) > 0)
1977 free_conflict_function (*overlaps_a
);
1978 free_conflict_function (*overlaps_b
);
1979 *overlaps_a
= conflict_fn_no_dependence ();
1980 *overlaps_b
= conflict_fn_no_dependence ();
1981 *last_conflicts
= integer_zero_node
;
1982 dependence_stats
.num_siv_independent
++;
1985 dependence_stats
.num_siv_dependent
++;
1989 /* When the step does not divide the difference, there are
1993 *overlaps_a
= conflict_fn_no_dependence ();
1994 *overlaps_b
= conflict_fn_no_dependence ();
1995 *last_conflicts
= integer_zero_node
;
1996 dependence_stats
.num_siv_independent
++;
2005 chrec_b = {10, +, -1}
2007 In this case, chrec_a will not overlap with chrec_b. */
2008 *overlaps_a
= conflict_fn_no_dependence ();
2009 *overlaps_b
= conflict_fn_no_dependence ();
2010 *last_conflicts
= integer_zero_node
;
2011 dependence_stats
.num_siv_independent
++;
2018 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
2020 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2021 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
2023 *overlaps_a
= conflict_fn_not_known ();
2024 *overlaps_b
= conflict_fn_not_known ();
2025 *last_conflicts
= chrec_dont_know
;
2026 dependence_stats
.num_siv_unimplemented
++;
2031 if (value2
== false)
2035 chrec_b = {10, +, -1}
2037 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
2039 HOST_WIDE_INT numiter
;
2040 struct loop
*loop
= get_chrec_loop (chrec_b
);
2042 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2043 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
2044 CHREC_RIGHT (chrec_b
));
2045 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
2046 *last_conflicts
= integer_one_node
;
2048 /* Perform weak-zero siv test to see if overlap is
2049 outside the loop bounds. */
2050 numiter
= max_stmt_executions_int (loop
);
2053 && compare_tree_int (tmp
, numiter
) > 0)
2055 free_conflict_function (*overlaps_a
);
2056 free_conflict_function (*overlaps_b
);
2057 *overlaps_a
= conflict_fn_no_dependence ();
2058 *overlaps_b
= conflict_fn_no_dependence ();
2059 *last_conflicts
= integer_zero_node
;
2060 dependence_stats
.num_siv_independent
++;
2063 dependence_stats
.num_siv_dependent
++;
2067 /* When the step does not divide the difference, there
2071 *overlaps_a
= conflict_fn_no_dependence ();
2072 *overlaps_b
= conflict_fn_no_dependence ();
2073 *last_conflicts
= integer_zero_node
;
2074 dependence_stats
.num_siv_independent
++;
2084 In this case, chrec_a will not overlap with chrec_b. */
2085 *overlaps_a
= conflict_fn_no_dependence ();
2086 *overlaps_b
= conflict_fn_no_dependence ();
2087 *last_conflicts
= integer_zero_node
;
2088 dependence_stats
.num_siv_independent
++;
2096 /* Helper recursive function for initializing the matrix A. Returns
2097 the initial value of CHREC. */
2100 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2104 switch (TREE_CODE (chrec
))
2106 case POLYNOMIAL_CHREC
:
2107 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2109 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2110 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2116 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2117 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2119 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2124 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2125 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2130 /* Handle ~X as -1 - X. */
2131 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2132 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2133 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2145 #define FLOOR_DIV(x,y) ((x) / (y))
2147 /* Solves the special case of the Diophantine equation:
2148 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2150 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2151 number of iterations that loops X and Y run. The overlaps will be
2152 constructed as evolutions in dimension DIM. */
2155 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2156 affine_fn
*overlaps_a
,
2157 affine_fn
*overlaps_b
,
2158 tree
*last_conflicts
, int dim
)
2160 if (((step_a
> 0 && step_b
> 0)
2161 || (step_a
< 0 && step_b
< 0)))
2163 int step_overlaps_a
, step_overlaps_b
;
2164 int gcd_steps_a_b
, last_conflict
, tau2
;
2166 gcd_steps_a_b
= gcd (step_a
, step_b
);
2167 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2168 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2172 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2173 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2174 last_conflict
= tau2
;
2175 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2178 *last_conflicts
= chrec_dont_know
;
2180 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2181 build_int_cst (NULL_TREE
,
2183 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2184 build_int_cst (NULL_TREE
,
2190 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2191 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2192 *last_conflicts
= integer_zero_node
;
2196 /* Solves the special case of a Diophantine equation where CHREC_A is
2197 an affine bivariate function, and CHREC_B is an affine univariate
2198 function. For example,
2200 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2202 has the following overlapping functions:
2204 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2205 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2206 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2208 FORNOW: This is a specialized implementation for a case occurring in
2209 a common benchmark. Implement the general algorithm. */
2212 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2213 conflict_function
**overlaps_a
,
2214 conflict_function
**overlaps_b
,
2215 tree
*last_conflicts
)
2217 bool xz_p
, yz_p
, xyz_p
;
2218 int step_x
, step_y
, step_z
;
2219 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2220 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2221 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2222 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2223 affine_fn ova1
, ova2
, ovb
;
2224 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2226 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2227 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2228 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2230 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2231 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2232 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2234 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2237 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2239 *overlaps_a
= conflict_fn_not_known ();
2240 *overlaps_b
= conflict_fn_not_known ();
2241 *last_conflicts
= chrec_dont_know
;
2245 niter
= MIN (niter_x
, niter_z
);
2246 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2249 &last_conflicts_xz
, 1);
2250 niter
= MIN (niter_y
, niter_z
);
2251 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2254 &last_conflicts_yz
, 2);
2255 niter
= MIN (niter_x
, niter_z
);
2256 niter
= MIN (niter_y
, niter
);
2257 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2260 &last_conflicts_xyz
, 3);
2262 xz_p
= !integer_zerop (last_conflicts_xz
);
2263 yz_p
= !integer_zerop (last_conflicts_yz
);
2264 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2266 if (xz_p
|| yz_p
|| xyz_p
)
2268 ova1
= affine_fn_cst (integer_zero_node
);
2269 ova2
= affine_fn_cst (integer_zero_node
);
2270 ovb
= affine_fn_cst (integer_zero_node
);
2273 affine_fn t0
= ova1
;
2276 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2277 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2278 affine_fn_free (t0
);
2279 affine_fn_free (t2
);
2280 *last_conflicts
= last_conflicts_xz
;
2284 affine_fn t0
= ova2
;
2287 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2288 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2289 affine_fn_free (t0
);
2290 affine_fn_free (t2
);
2291 *last_conflicts
= last_conflicts_yz
;
2295 affine_fn t0
= ova1
;
2296 affine_fn t2
= ova2
;
2299 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2300 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2301 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2302 affine_fn_free (t0
);
2303 affine_fn_free (t2
);
2304 affine_fn_free (t4
);
2305 *last_conflicts
= last_conflicts_xyz
;
2307 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2308 *overlaps_b
= conflict_fn (1, ovb
);
2312 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2313 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2314 *last_conflicts
= integer_zero_node
;
2317 affine_fn_free (overlaps_a_xz
);
2318 affine_fn_free (overlaps_b_xz
);
2319 affine_fn_free (overlaps_a_yz
);
2320 affine_fn_free (overlaps_b_yz
);
2321 affine_fn_free (overlaps_a_xyz
);
2322 affine_fn_free (overlaps_b_xyz
);
2325 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2328 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2331 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2334 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2337 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2342 for (i
= 0; i
< m
; i
++)
2343 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2346 /* Store the N x N identity matrix in MAT. */
2349 lambda_matrix_id (lambda_matrix mat
, int size
)
2353 for (i
= 0; i
< size
; i
++)
2354 for (j
= 0; j
< size
; j
++)
2355 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2358 /* Return the first nonzero element of vector VEC1 between START and N.
2359 We must have START <= N. Returns N if VEC1 is the zero vector. */
2362 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2365 while (j
< n
&& vec1
[j
] == 0)
2370 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2371 R2 = R2 + CONST1 * R1. */
2374 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2381 for (i
= 0; i
< n
; i
++)
2382 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2385 /* Swap rows R1 and R2 in matrix MAT. */
2388 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2397 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2398 and store the result in VEC2. */
2401 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2402 int size
, int const1
)
2407 lambda_vector_clear (vec2
, size
);
2409 for (i
= 0; i
< size
; i
++)
2410 vec2
[i
] = const1
* vec1
[i
];
2413 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2416 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2419 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2422 /* Negate row R1 of matrix MAT which has N columns. */
2425 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2427 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2430 /* Return true if two vectors are equal. */
2433 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2436 for (i
= 0; i
< size
; i
++)
2437 if (vec1
[i
] != vec2
[i
])
2442 /* Given an M x N integer matrix A, this function determines an M x
2443 M unimodular matrix U, and an M x N echelon matrix S such that
2444 "U.A = S". This decomposition is also known as "right Hermite".
2446 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2447 Restructuring Compilers" Utpal Banerjee. */
2450 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2451 lambda_matrix S
, lambda_matrix U
)
2455 lambda_matrix_copy (A
, S
, m
, n
);
2456 lambda_matrix_id (U
, m
);
2458 for (j
= 0; j
< n
; j
++)
2460 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2463 for (i
= m
- 1; i
>= i0
; i
--)
2465 while (S
[i
][j
] != 0)
2467 int sigma
, factor
, a
, b
;
2471 sigma
= (a
* b
< 0) ? -1: 1;
2474 factor
= sigma
* (a
/ b
);
2476 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2477 lambda_matrix_row_exchange (S
, i
, i
-1);
2479 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2480 lambda_matrix_row_exchange (U
, i
, i
-1);
2487 /* Determines the overlapping elements due to accesses CHREC_A and
2488 CHREC_B, that are affine functions. This function cannot handle
2489 symbolic evolution functions, ie. when initial conditions are
2490 parameters, because it uses lambda matrices of integers. */
2493 analyze_subscript_affine_affine (tree chrec_a
,
2495 conflict_function
**overlaps_a
,
2496 conflict_function
**overlaps_b
,
2497 tree
*last_conflicts
)
2499 unsigned nb_vars_a
, nb_vars_b
, dim
;
2500 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2501 lambda_matrix A
, U
, S
;
2502 struct obstack scratch_obstack
;
2504 if (eq_evolutions_p (chrec_a
, chrec_b
))
2506 /* The accessed index overlaps for each iteration in the
2508 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2509 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2510 *last_conflicts
= chrec_dont_know
;
2513 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2514 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2516 /* For determining the initial intersection, we have to solve a
2517 Diophantine equation. This is the most time consuming part.
2519 For answering to the question: "Is there a dependence?" we have
2520 to prove that there exists a solution to the Diophantine
2521 equation, and that the solution is in the iteration domain,
2522 i.e. the solution is positive or zero, and that the solution
2523 happens before the upper bound loop.nb_iterations. Otherwise
2524 there is no dependence. This function outputs a description of
2525 the iterations that hold the intersections. */
2527 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2528 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2530 gcc_obstack_init (&scratch_obstack
);
2532 dim
= nb_vars_a
+ nb_vars_b
;
2533 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2534 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2535 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2537 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2538 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2539 gamma
= init_b
- init_a
;
2541 /* Don't do all the hard work of solving the Diophantine equation
2542 when we already know the solution: for example,
2545 | gamma = 3 - 3 = 0.
2546 Then the first overlap occurs during the first iterations:
2547 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2551 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2553 HOST_WIDE_INT step_a
, step_b
;
2554 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2557 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2558 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2559 niter
= MIN (niter_a
, niter_b
);
2560 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2561 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2563 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2566 *overlaps_a
= conflict_fn (1, ova
);
2567 *overlaps_b
= conflict_fn (1, ovb
);
2570 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2571 compute_overlap_steps_for_affine_1_2
2572 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2574 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2575 compute_overlap_steps_for_affine_1_2
2576 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2580 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2581 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2582 *overlaps_a
= conflict_fn_not_known ();
2583 *overlaps_b
= conflict_fn_not_known ();
2584 *last_conflicts
= chrec_dont_know
;
2586 goto end_analyze_subs_aa
;
2590 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2595 lambda_matrix_row_negate (U
, dim
, 0);
2597 gcd_alpha_beta
= S
[0][0];
2599 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2600 but that is a quite strange case. Instead of ICEing, answer
2602 if (gcd_alpha_beta
== 0)
2604 *overlaps_a
= conflict_fn_not_known ();
2605 *overlaps_b
= conflict_fn_not_known ();
2606 *last_conflicts
= chrec_dont_know
;
2607 goto end_analyze_subs_aa
;
2610 /* The classic "gcd-test". */
2611 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2613 /* The "gcd-test" has determined that there is no integer
2614 solution, i.e. there is no dependence. */
2615 *overlaps_a
= conflict_fn_no_dependence ();
2616 *overlaps_b
= conflict_fn_no_dependence ();
2617 *last_conflicts
= integer_zero_node
;
2620 /* Both access functions are univariate. This includes SIV and MIV cases. */
2621 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2623 /* Both functions should have the same evolution sign. */
2624 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2625 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2627 /* The solutions are given by:
2629 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2632 For a given integer t. Using the following variables,
2634 | i0 = u11 * gamma / gcd_alpha_beta
2635 | j0 = u12 * gamma / gcd_alpha_beta
2642 | y0 = j0 + j1 * t. */
2643 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2645 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2646 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2650 if ((i1
== 0 && i0
< 0)
2651 || (j1
== 0 && j0
< 0))
2653 /* There is no solution.
2654 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2655 falls in here, but for the moment we don't look at the
2656 upper bound of the iteration domain. */
2657 *overlaps_a
= conflict_fn_no_dependence ();
2658 *overlaps_b
= conflict_fn_no_dependence ();
2659 *last_conflicts
= integer_zero_node
;
2660 goto end_analyze_subs_aa
;
2663 if (i1
> 0 && j1
> 0)
2665 HOST_WIDE_INT niter_a
2666 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2667 HOST_WIDE_INT niter_b
2668 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2669 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2671 /* (X0, Y0) is a solution of the Diophantine equation:
2672 "chrec_a (X0) = chrec_b (Y0)". */
2673 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2675 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2676 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2678 /* (X1, Y1) is the smallest positive solution of the eq
2679 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2680 first conflict occurs. */
2681 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2682 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2683 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2687 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2688 FLOOR_DIV (niter
- j0
, j1
));
2689 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2691 /* If the overlap occurs outside of the bounds of the
2692 loop, there is no dependence. */
2693 if (x1
>= niter
|| y1
>= niter
)
2695 *overlaps_a
= conflict_fn_no_dependence ();
2696 *overlaps_b
= conflict_fn_no_dependence ();
2697 *last_conflicts
= integer_zero_node
;
2698 goto end_analyze_subs_aa
;
2701 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2704 *last_conflicts
= chrec_dont_know
;
2708 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2710 build_int_cst (NULL_TREE
, i1
)));
2713 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2715 build_int_cst (NULL_TREE
, j1
)));
2719 /* FIXME: For the moment, the upper bound of the
2720 iteration domain for i and j is not checked. */
2721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2722 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2723 *overlaps_a
= conflict_fn_not_known ();
2724 *overlaps_b
= conflict_fn_not_known ();
2725 *last_conflicts
= chrec_dont_know
;
2730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2731 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2732 *overlaps_a
= conflict_fn_not_known ();
2733 *overlaps_b
= conflict_fn_not_known ();
2734 *last_conflicts
= chrec_dont_know
;
2739 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2740 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2741 *overlaps_a
= conflict_fn_not_known ();
2742 *overlaps_b
= conflict_fn_not_known ();
2743 *last_conflicts
= chrec_dont_know
;
2746 end_analyze_subs_aa
:
2747 obstack_free (&scratch_obstack
, NULL
);
2748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2750 fprintf (dump_file
, " (overlaps_a = ");
2751 dump_conflict_function (dump_file
, *overlaps_a
);
2752 fprintf (dump_file
, ")\n (overlaps_b = ");
2753 dump_conflict_function (dump_file
, *overlaps_b
);
2754 fprintf (dump_file
, "))\n");
2758 /* Returns true when analyze_subscript_affine_affine can be used for
2759 determining the dependence relation between chrec_a and chrec_b,
2760 that contain symbols. This function modifies chrec_a and chrec_b
2761 such that the analysis result is the same, and such that they don't
2762 contain symbols, and then can safely be passed to the analyzer.
2764 Example: The analysis of the following tuples of evolutions produce
2765 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2768 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2769 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2773 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2775 tree diff
, type
, left_a
, left_b
, right_b
;
2777 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2778 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2779 /* FIXME: For the moment not handled. Might be refined later. */
2782 type
= chrec_type (*chrec_a
);
2783 left_a
= CHREC_LEFT (*chrec_a
);
2784 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2785 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2787 if (!evolution_function_is_constant_p (diff
))
2790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2791 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2793 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2794 diff
, CHREC_RIGHT (*chrec_a
));
2795 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2796 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2797 build_int_cst (type
, 0),
2802 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2803 *OVERLAPS_B are initialized to the functions that describe the
2804 relation between the elements accessed twice by CHREC_A and
2805 CHREC_B. For k >= 0, the following property is verified:
2807 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2810 analyze_siv_subscript (tree chrec_a
,
2812 conflict_function
**overlaps_a
,
2813 conflict_function
**overlaps_b
,
2814 tree
*last_conflicts
,
2817 dependence_stats
.num_siv
++;
2819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2820 fprintf (dump_file
, "(analyze_siv_subscript \n");
2822 if (evolution_function_is_constant_p (chrec_a
)
2823 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2824 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2825 overlaps_a
, overlaps_b
, last_conflicts
);
2827 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2828 && evolution_function_is_constant_p (chrec_b
))
2829 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2830 overlaps_b
, overlaps_a
, last_conflicts
);
2832 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2833 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2835 if (!chrec_contains_symbols (chrec_a
)
2836 && !chrec_contains_symbols (chrec_b
))
2838 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2839 overlaps_a
, overlaps_b
,
2842 if (CF_NOT_KNOWN_P (*overlaps_a
)
2843 || CF_NOT_KNOWN_P (*overlaps_b
))
2844 dependence_stats
.num_siv_unimplemented
++;
2845 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2846 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2847 dependence_stats
.num_siv_independent
++;
2849 dependence_stats
.num_siv_dependent
++;
2851 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2854 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2855 overlaps_a
, overlaps_b
,
2858 if (CF_NOT_KNOWN_P (*overlaps_a
)
2859 || CF_NOT_KNOWN_P (*overlaps_b
))
2860 dependence_stats
.num_siv_unimplemented
++;
2861 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2862 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2863 dependence_stats
.num_siv_independent
++;
2865 dependence_stats
.num_siv_dependent
++;
2868 goto siv_subscript_dontknow
;
2873 siv_subscript_dontknow
:;
2874 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2875 fprintf (dump_file
, " siv test failed: unimplemented");
2876 *overlaps_a
= conflict_fn_not_known ();
2877 *overlaps_b
= conflict_fn_not_known ();
2878 *last_conflicts
= chrec_dont_know
;
2879 dependence_stats
.num_siv_unimplemented
++;
2882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2883 fprintf (dump_file
, ")\n");
2886 /* Returns false if we can prove that the greatest common divisor of the steps
2887 of CHREC does not divide CST, false otherwise. */
2890 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2892 HOST_WIDE_INT cd
= 0, val
;
2895 if (!tree_fits_shwi_p (cst
))
2897 val
= tree_to_shwi (cst
);
2899 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2901 step
= CHREC_RIGHT (chrec
);
2902 if (!tree_fits_shwi_p (step
))
2904 cd
= gcd (cd
, tree_to_shwi (step
));
2905 chrec
= CHREC_LEFT (chrec
);
2908 return val
% cd
== 0;
2911 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2912 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2913 functions that describe the relation between the elements accessed
2914 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2917 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2920 analyze_miv_subscript (tree chrec_a
,
2922 conflict_function
**overlaps_a
,
2923 conflict_function
**overlaps_b
,
2924 tree
*last_conflicts
,
2925 struct loop
*loop_nest
)
2927 tree type
, difference
;
2929 dependence_stats
.num_miv
++;
2930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2931 fprintf (dump_file
, "(analyze_miv_subscript \n");
2933 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2934 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2935 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2936 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2938 if (eq_evolutions_p (chrec_a
, chrec_b
))
2940 /* Access functions are the same: all the elements are accessed
2941 in the same order. */
2942 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2943 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2944 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2945 dependence_stats
.num_miv_dependent
++;
2948 else if (evolution_function_is_constant_p (difference
)
2949 /* For the moment, the following is verified:
2950 evolution_function_is_affine_multivariate_p (chrec_a,
2952 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2954 /* testsuite/.../ssa-chrec-33.c
2955 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2957 The difference is 1, and all the evolution steps are multiples
2958 of 2, consequently there are no overlapping elements. */
2959 *overlaps_a
= conflict_fn_no_dependence ();
2960 *overlaps_b
= conflict_fn_no_dependence ();
2961 *last_conflicts
= integer_zero_node
;
2962 dependence_stats
.num_miv_independent
++;
2965 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2966 && !chrec_contains_symbols (chrec_a
)
2967 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2968 && !chrec_contains_symbols (chrec_b
))
2970 /* testsuite/.../ssa-chrec-35.c
2971 {0, +, 1}_2 vs. {0, +, 1}_3
2972 the overlapping elements are respectively located at iterations:
2973 {0, +, 1}_x and {0, +, 1}_x,
2974 in other words, we have the equality:
2975 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2978 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2979 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2981 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2982 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2984 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2985 overlaps_a
, overlaps_b
, last_conflicts
);
2987 if (CF_NOT_KNOWN_P (*overlaps_a
)
2988 || CF_NOT_KNOWN_P (*overlaps_b
))
2989 dependence_stats
.num_miv_unimplemented
++;
2990 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2991 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2992 dependence_stats
.num_miv_independent
++;
2994 dependence_stats
.num_miv_dependent
++;
2999 /* When the analysis is too difficult, answer "don't know". */
3000 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3001 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
3003 *overlaps_a
= conflict_fn_not_known ();
3004 *overlaps_b
= conflict_fn_not_known ();
3005 *last_conflicts
= chrec_dont_know
;
3006 dependence_stats
.num_miv_unimplemented
++;
3009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3010 fprintf (dump_file
, ")\n");
3013 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3014 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3015 OVERLAP_ITERATIONS_B are initialized with two functions that
3016 describe the iterations that contain conflicting elements.
3018 Remark: For an integer k >= 0, the following equality is true:
3020 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3024 analyze_overlapping_iterations (tree chrec_a
,
3026 conflict_function
**overlap_iterations_a
,
3027 conflict_function
**overlap_iterations_b
,
3028 tree
*last_conflicts
, struct loop
*loop_nest
)
3030 unsigned int lnn
= loop_nest
->num
;
3032 dependence_stats
.num_subscript_tests
++;
3034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3036 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
3037 fprintf (dump_file
, " (chrec_a = ");
3038 print_generic_expr (dump_file
, chrec_a
, 0);
3039 fprintf (dump_file
, ")\n (chrec_b = ");
3040 print_generic_expr (dump_file
, chrec_b
, 0);
3041 fprintf (dump_file
, ")\n");
3044 if (chrec_a
== NULL_TREE
3045 || chrec_b
== NULL_TREE
3046 || chrec_contains_undetermined (chrec_a
)
3047 || chrec_contains_undetermined (chrec_b
))
3049 dependence_stats
.num_subscript_undetermined
++;
3051 *overlap_iterations_a
= conflict_fn_not_known ();
3052 *overlap_iterations_b
= conflict_fn_not_known ();
3055 /* If they are the same chrec, and are affine, they overlap
3056 on every iteration. */
3057 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3058 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3059 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3061 dependence_stats
.num_same_subscript_function
++;
3062 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3063 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3064 *last_conflicts
= chrec_dont_know
;
3067 /* If they aren't the same, and aren't affine, we can't do anything
3069 else if ((chrec_contains_symbols (chrec_a
)
3070 || chrec_contains_symbols (chrec_b
))
3071 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3072 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3074 dependence_stats
.num_subscript_undetermined
++;
3075 *overlap_iterations_a
= conflict_fn_not_known ();
3076 *overlap_iterations_b
= conflict_fn_not_known ();
3079 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3080 analyze_ziv_subscript (chrec_a
, chrec_b
,
3081 overlap_iterations_a
, overlap_iterations_b
,
3084 else if (siv_subscript_p (chrec_a
, chrec_b
))
3085 analyze_siv_subscript (chrec_a
, chrec_b
,
3086 overlap_iterations_a
, overlap_iterations_b
,
3087 last_conflicts
, lnn
);
3090 analyze_miv_subscript (chrec_a
, chrec_b
,
3091 overlap_iterations_a
, overlap_iterations_b
,
3092 last_conflicts
, loop_nest
);
3094 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3096 fprintf (dump_file
, " (overlap_iterations_a = ");
3097 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3098 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3099 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3100 fprintf (dump_file
, "))\n");
3104 /* Helper function for uniquely inserting distance vectors. */
3107 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3112 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3113 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3116 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3119 /* Helper function for uniquely inserting direction vectors. */
3122 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3127 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3128 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3131 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3134 /* Add a distance of 1 on all the loops outer than INDEX. If we
3135 haven't yet determined a distance for this outer loop, push a new
3136 distance vector composed of the previous distance, and a distance
3137 of 1 for this outer loop. Example:
3145 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3146 save (0, 1), then we have to save (1, 0). */
3149 add_outer_distances (struct data_dependence_relation
*ddr
,
3150 lambda_vector dist_v
, int index
)
3152 /* For each outer loop where init_v is not set, the accesses are
3153 in dependence of distance 1 in the loop. */
3154 while (--index
>= 0)
3156 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3157 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3159 save_dist_v (ddr
, save_v
);
3163 /* Return false when fail to represent the data dependence as a
3164 distance vector. INIT_B is set to true when a component has been
3165 added to the distance vector DIST_V. INDEX_CARRY is then set to
3166 the index in DIST_V that carries the dependence. */
3169 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3170 struct data_reference
*ddr_a
,
3171 struct data_reference
*ddr_b
,
3172 lambda_vector dist_v
, bool *init_b
,
3176 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3178 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3180 tree access_fn_a
, access_fn_b
;
3181 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3183 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3185 non_affine_dependence_relation (ddr
);
3189 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3190 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3192 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3193 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3196 int var_a
= CHREC_VARIABLE (access_fn_a
);
3197 int var_b
= CHREC_VARIABLE (access_fn_b
);
3200 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3202 non_affine_dependence_relation (ddr
);
3206 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3207 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3208 *index_carry
= MIN (index
, *index_carry
);
3210 /* This is the subscript coupling test. If we have already
3211 recorded a distance for this loop (a distance coming from
3212 another subscript), it should be the same. For example,
3213 in the following code, there is no dependence:
3220 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3222 finalize_ddr_dependent (ddr
, chrec_known
);
3226 dist_v
[index
] = dist
;
3230 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3232 /* This can be for example an affine vs. constant dependence
3233 (T[i] vs. T[3]) that is not an affine dependence and is
3234 not representable as a distance vector. */
3235 non_affine_dependence_relation (ddr
);
3243 /* Return true when the DDR contains only constant access functions. */
3246 constant_access_functions (const struct data_dependence_relation
*ddr
)
3250 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3251 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3252 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3258 /* Helper function for the case where DDR_A and DDR_B are the same
3259 multivariate access function with a constant step. For an example
3263 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3266 tree c_1
= CHREC_LEFT (c_2
);
3267 tree c_0
= CHREC_LEFT (c_1
);
3268 lambda_vector dist_v
;
3271 /* Polynomials with more than 2 variables are not handled yet. When
3272 the evolution steps are parameters, it is not possible to
3273 represent the dependence using classical distance vectors. */
3274 if (TREE_CODE (c_0
) != INTEGER_CST
3275 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3276 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3278 DDR_AFFINE_P (ddr
) = false;
3282 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3283 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3285 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3286 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3287 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3288 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3301 save_dist_v (ddr
, dist_v
);
3303 add_outer_distances (ddr
, dist_v
, x_1
);
3306 /* Helper function for the case where DDR_A and DDR_B are the same
3307 access functions. */
3310 add_other_self_distances (struct data_dependence_relation
*ddr
)
3312 lambda_vector dist_v
;
3314 int index_carry
= DDR_NB_LOOPS (ddr
);
3316 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3318 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3320 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3322 if (!evolution_function_is_univariate_p (access_fun
))
3324 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3326 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3330 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3332 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3333 add_multivariate_self_dist (ddr
, access_fun
);
3335 /* The evolution step is not constant: it varies in
3336 the outer loop, so this cannot be represented by a
3337 distance vector. For example in pr34635.c the
3338 evolution is {0, +, {0, +, 4}_1}_2. */
3339 DDR_AFFINE_P (ddr
) = false;
3344 index_carry
= MIN (index_carry
,
3345 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3346 DDR_LOOP_NEST (ddr
)));
3350 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3351 add_outer_distances (ddr
, dist_v
, index_carry
);
3355 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3357 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3359 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3360 save_dist_v (ddr
, dist_v
);
3363 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3364 is the case for example when access functions are the same and
3365 equal to a constant, as in:
3372 in which case the distance vectors are (0) and (1). */
3375 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3379 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3381 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3382 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3383 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3385 for (j
= 0; j
< ca
->n
; j
++)
3386 if (affine_function_zero_p (ca
->fns
[j
]))
3388 insert_innermost_unit_dist_vector (ddr
);
3392 for (j
= 0; j
< cb
->n
; j
++)
3393 if (affine_function_zero_p (cb
->fns
[j
]))
3395 insert_innermost_unit_dist_vector (ddr
);
3401 /* Compute the classic per loop distance vector. DDR is the data
3402 dependence relation to build a vector from. Return false when fail
3403 to represent the data dependence as a distance vector. */
3406 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3407 struct loop
*loop_nest
)
3409 bool init_b
= false;
3410 int index_carry
= DDR_NB_LOOPS (ddr
);
3411 lambda_vector dist_v
;
3413 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3416 if (same_access_functions (ddr
))
3418 /* Save the 0 vector. */
3419 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3420 save_dist_v (ddr
, dist_v
);
3422 if (constant_access_functions (ddr
))
3423 add_distance_for_zero_overlaps (ddr
);
3425 if (DDR_NB_LOOPS (ddr
) > 1)
3426 add_other_self_distances (ddr
);
3431 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3432 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3433 dist_v
, &init_b
, &index_carry
))
3436 /* Save the distance vector if we initialized one. */
3439 /* Verify a basic constraint: classic distance vectors should
3440 always be lexicographically positive.
3442 Data references are collected in the order of execution of
3443 the program, thus for the following loop
3445 | for (i = 1; i < 100; i++)
3446 | for (j = 1; j < 100; j++)
3448 | t = T[j+1][i-1]; // A
3449 | T[j][i] = t + 2; // B
3452 references are collected following the direction of the wind:
3453 A then B. The data dependence tests are performed also
3454 following this order, such that we're looking at the distance
3455 separating the elements accessed by A from the elements later
3456 accessed by B. But in this example, the distance returned by
3457 test_dep (A, B) is lexicographically negative (-1, 1), that
3458 means that the access A occurs later than B with respect to
3459 the outer loop, ie. we're actually looking upwind. In this
3460 case we solve test_dep (B, A) looking downwind to the
3461 lexicographically positive solution, that returns the
3462 distance vector (1, -1). */
3463 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3465 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3466 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3469 compute_subscript_distance (ddr
);
3470 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3471 save_v
, &init_b
, &index_carry
))
3473 save_dist_v (ddr
, save_v
);
3474 DDR_REVERSED_P (ddr
) = true;
3476 /* In this case there is a dependence forward for all the
3479 | for (k = 1; k < 100; k++)
3480 | for (i = 1; i < 100; i++)
3481 | for (j = 1; j < 100; j++)
3483 | t = T[j+1][i-1]; // A
3484 | T[j][i] = t + 2; // B
3492 if (DDR_NB_LOOPS (ddr
) > 1)
3494 add_outer_distances (ddr
, save_v
, index_carry
);
3495 add_outer_distances (ddr
, dist_v
, index_carry
);
3500 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3501 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3503 if (DDR_NB_LOOPS (ddr
) > 1)
3505 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3507 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3508 DDR_A (ddr
), loop_nest
))
3510 compute_subscript_distance (ddr
);
3511 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3512 opposite_v
, &init_b
,
3516 save_dist_v (ddr
, save_v
);
3517 add_outer_distances (ddr
, dist_v
, index_carry
);
3518 add_outer_distances (ddr
, opposite_v
, index_carry
);
3521 save_dist_v (ddr
, save_v
);
3526 /* There is a distance of 1 on all the outer loops: Example:
3527 there is a dependence of distance 1 on loop_1 for the array A.
3533 add_outer_distances (ddr
, dist_v
,
3534 lambda_vector_first_nz (dist_v
,
3535 DDR_NB_LOOPS (ddr
), 0));
3538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, "(build_classic_dist_vector\n");
3543 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3545 fprintf (dump_file
, " dist_vector = (");
3546 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3547 DDR_NB_LOOPS (ddr
));
3548 fprintf (dump_file
, " )\n");
3550 fprintf (dump_file
, ")\n");
3556 /* Return the direction for a given distance.
3557 FIXME: Computing dir this way is suboptimal, since dir can catch
3558 cases that dist is unable to represent. */
3560 static inline enum data_dependence_direction
3561 dir_from_dist (int dist
)
3564 return dir_positive
;
3566 return dir_negative
;
3571 /* Compute the classic per loop direction vector. DDR is the data
3572 dependence relation to build a vector from. */
3575 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3578 lambda_vector dist_v
;
3580 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3582 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3584 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3585 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3587 save_dir_v (ddr
, dir_v
);
3591 /* Helper function. Returns true when there is a dependence between
3592 data references DRA and DRB. */
3595 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3596 struct data_reference
*dra
,
3597 struct data_reference
*drb
,
3598 struct loop
*loop_nest
)
3601 tree last_conflicts
;
3602 struct subscript
*subscript
;
3603 tree res
= NULL_TREE
;
3605 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3607 conflict_function
*overlaps_a
, *overlaps_b
;
3609 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3610 DR_ACCESS_FN (drb
, i
),
3611 &overlaps_a
, &overlaps_b
,
3612 &last_conflicts
, loop_nest
);
3614 if (SUB_CONFLICTS_IN_A (subscript
))
3615 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3616 if (SUB_CONFLICTS_IN_B (subscript
))
3617 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3619 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3620 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3621 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3623 /* If there is any undetermined conflict function we have to
3624 give a conservative answer in case we cannot prove that
3625 no dependence exists when analyzing another subscript. */
3626 if (CF_NOT_KNOWN_P (overlaps_a
)
3627 || CF_NOT_KNOWN_P (overlaps_b
))
3629 res
= chrec_dont_know
;
3633 /* When there is a subscript with no dependence we can stop. */
3634 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3635 || CF_NO_DEPENDENCE_P (overlaps_b
))
3642 if (res
== NULL_TREE
)
3645 if (res
== chrec_known
)
3646 dependence_stats
.num_dependence_independent
++;
3648 dependence_stats
.num_dependence_undetermined
++;
3649 finalize_ddr_dependent (ddr
, res
);
3653 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3656 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3657 struct loop
*loop_nest
)
3659 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3660 dependence_stats
.num_dependence_dependent
++;
3662 compute_subscript_distance (ddr
);
3663 if (build_classic_dist_vector (ddr
, loop_nest
))
3664 build_classic_dir_vector (ddr
);
3667 /* Returns true when all the access functions of A are affine or
3668 constant with respect to LOOP_NEST. */
3671 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3672 const struct loop
*loop_nest
)
3675 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3678 FOR_EACH_VEC_ELT (fns
, i
, t
)
3679 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3680 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3686 /* Initializes an equation for an OMEGA problem using the information
3687 contained in the ACCESS_FUN. Returns true when the operation
3690 PB is the omega constraint system.
3691 EQ is the number of the equation to be initialized.
3692 OFFSET is used for shifting the variables names in the constraints:
3693 a constrain is composed of 2 * the number of variables surrounding
3694 dependence accesses. OFFSET is set either to 0 for the first n variables,
3695 then it is set to n.
3696 ACCESS_FUN is expected to be an affine chrec. */
3699 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3700 unsigned int offset
, tree access_fun
,
3701 struct data_dependence_relation
*ddr
)
3703 switch (TREE_CODE (access_fun
))
3705 case POLYNOMIAL_CHREC
:
3707 tree left
= CHREC_LEFT (access_fun
);
3708 tree right
= CHREC_RIGHT (access_fun
);
3709 int var
= CHREC_VARIABLE (access_fun
);
3712 if (TREE_CODE (right
) != INTEGER_CST
)
3715 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3716 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3718 /* Compute the innermost loop index. */
3719 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3722 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3723 += int_cst_value (right
);
3725 switch (TREE_CODE (left
))
3727 case POLYNOMIAL_CHREC
:
3728 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3731 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3740 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3748 /* As explained in the comments preceding init_omega_for_ddr, we have
3749 to set up a system for each loop level, setting outer loops
3750 variation to zero, and current loop variation to positive or zero.
3751 Save each lexico positive distance vector. */
3754 omega_extract_distance_vectors (omega_pb pb
,
3755 struct data_dependence_relation
*ddr
)
3759 struct loop
*loopi
, *loopj
;
3760 enum omega_result res
;
3762 /* Set a new problem for each loop in the nest. The basis is the
3763 problem that we have initialized until now. On top of this we
3764 add new constraints. */
3765 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3766 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3769 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3770 DDR_NB_LOOPS (ddr
));
3772 omega_copy_problem (copy
, pb
);
3774 /* For all the outer loops "loop_j", add "dj = 0". */
3775 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3777 eq
= omega_add_zero_eq (copy
, omega_black
);
3778 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3781 /* For "loop_i", add "0 <= di". */
3782 geq
= omega_add_zero_geq (copy
, omega_black
);
3783 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3785 /* Reduce the constraint system, and test that the current
3786 problem is feasible. */
3787 res
= omega_simplify_problem (copy
);
3788 if (res
== omega_false
3789 || res
== omega_unknown
3790 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3793 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3794 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3796 dist
= copy
->subs
[eq
].coef
[0];
3802 /* Reinitialize problem... */
3803 omega_copy_problem (copy
, pb
);
3804 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3806 eq
= omega_add_zero_eq (copy
, omega_black
);
3807 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3810 /* ..., but this time "di = 1". */
3811 eq
= omega_add_zero_eq (copy
, omega_black
);
3812 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3813 copy
->eqs
[eq
].coef
[0] = -1;
3815 res
= omega_simplify_problem (copy
);
3816 if (res
== omega_false
3817 || res
== omega_unknown
3818 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3821 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3822 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3824 dist
= copy
->subs
[eq
].coef
[0];
3830 /* Save the lexicographically positive distance vector. */
3833 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3834 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3838 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3839 if (copy
->subs
[eq
].key
> 0)
3841 dist
= copy
->subs
[eq
].coef
[0];
3842 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3845 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3846 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3848 save_dist_v (ddr
, dist_v
);
3849 save_dir_v (ddr
, dir_v
);
3853 omega_free_problem (copy
);
3857 /* This is called for each subscript of a tuple of data references:
3858 insert an equality for representing the conflicts. */
3861 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3862 struct data_dependence_relation
*ddr
,
3863 omega_pb pb
, bool *maybe_dependent
)
3866 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3867 TREE_TYPE (access_fun_b
));
3868 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3869 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3870 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3873 /* When the fun_a - fun_b is not constant, the dependence is not
3874 captured by the classic distance vector representation. */
3875 if (TREE_CODE (difference
) != INTEGER_CST
)
3879 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3881 /* There is no dependence. */
3882 *maybe_dependent
= false;
3886 minus_one
= build_int_cst (type
, -1);
3887 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3889 eq
= omega_add_zero_eq (pb
, omega_black
);
3890 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3891 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3892 /* There is probably a dependence, but the system of
3893 constraints cannot be built: answer "don't know". */
3897 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3898 && !int_divides_p (lambda_vector_gcd
3899 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3900 2 * DDR_NB_LOOPS (ddr
)),
3901 pb
->eqs
[eq
].coef
[0]))
3903 /* There is no dependence. */
3904 *maybe_dependent
= false;
3911 /* Helper function, same as init_omega_for_ddr but specialized for
3912 data references A and B. */
3915 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3916 struct data_dependence_relation
*ddr
,
3917 omega_pb pb
, bool *maybe_dependent
)
3922 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3924 /* Insert an equality per subscript. */
3925 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3927 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3928 ddr
, pb
, maybe_dependent
))
3930 else if (*maybe_dependent
== false)
3932 /* There is no dependence. */
3933 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3938 /* Insert inequalities: constraints corresponding to the iteration
3939 domain, i.e. the loops surrounding the references "loop_x" and
3940 the distance variables "dx". The layout of the OMEGA
3941 representation is as follows:
3942 - coef[0] is the constant
3943 - coef[1..nb_loops] are the protected variables that will not be
3944 removed by the solver: the "dx"
3945 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3947 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3948 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3950 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
);
3953 ineq
= omega_add_zero_geq (pb
, omega_black
);
3954 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3956 /* 0 <= loop_x + dx */
3957 ineq
= omega_add_zero_geq (pb
, omega_black
);
3958 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3959 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3963 /* loop_x <= nb_iters */
3964 ineq
= omega_add_zero_geq (pb
, omega_black
);
3965 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3966 pb
->geqs
[ineq
].coef
[0] = nbi
;
3968 /* loop_x + dx <= nb_iters */
3969 ineq
= omega_add_zero_geq (pb
, omega_black
);
3970 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3971 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3972 pb
->geqs
[ineq
].coef
[0] = nbi
;
3974 /* A step "dx" bigger than nb_iters is not feasible, so
3975 add "0 <= nb_iters + dx", */
3976 ineq
= omega_add_zero_geq (pb
, omega_black
);
3977 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3978 pb
->geqs
[ineq
].coef
[0] = nbi
;
3979 /* and "dx <= nb_iters". */
3980 ineq
= omega_add_zero_geq (pb
, omega_black
);
3981 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3982 pb
->geqs
[ineq
].coef
[0] = nbi
;
3986 omega_extract_distance_vectors (pb
, ddr
);
3991 /* Sets up the Omega dependence problem for the data dependence
3992 relation DDR. Returns false when the constraint system cannot be
3993 built, ie. when the test answers "don't know". Returns true
3994 otherwise, and when independence has been proved (using one of the
3995 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3996 set MAYBE_DEPENDENT to true.
3998 Example: for setting up the dependence system corresponding to the
3999 conflicting accesses
4004 | ... A[2*j, 2*(i + j)]
4008 the following constraints come from the iteration domain:
4015 where di, dj are the distance variables. The constraints
4016 representing the conflicting elements are:
4019 i + 1 = 2 * (i + di + j + dj)
4021 For asking that the resulting distance vector (di, dj) be
4022 lexicographically positive, we insert the constraint "di >= 0". If
4023 "di = 0" in the solution, we fix that component to zero, and we
4024 look at the inner loops: we set a new problem where all the outer
4025 loop distances are zero, and fix this inner component to be
4026 positive. When one of the components is positive, we save that
4027 distance, and set a new problem where the distance on this loop is
4028 zero, searching for other distances in the inner loops. Here is
4029 the classic example that illustrates that we have to set for each
4030 inner loop a new problem:
4038 we have to save two distances (1, 0) and (0, 1).
4040 Given two array references, refA and refB, we have to set the
4041 dependence problem twice, refA vs. refB and refB vs. refA, and we
4042 cannot do a single test, as refB might occur before refA in the
4043 inner loops, and the contrary when considering outer loops: ex.
4048 | T[{1,+,1}_2][{1,+,1}_1] // refA
4049 | T[{2,+,1}_2][{0,+,1}_1] // refB
4054 refB touches the elements in T before refA, and thus for the same
4055 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4056 but for successive loop_0 iterations, we have (1, -1, 1)
4058 The Omega solver expects the distance variables ("di" in the
4059 previous example) to come first in the constraint system (as
4060 variables to be protected, or "safe" variables), the constraint
4061 system is built using the following layout:
4063 "cst | distance vars | index vars".
4067 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
4068 bool *maybe_dependent
)
4073 *maybe_dependent
= true;
4075 if (same_access_functions (ddr
))
4078 lambda_vector dir_v
;
4080 /* Save the 0 vector. */
4081 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4082 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4083 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4084 dir_v
[j
] = dir_equal
;
4085 save_dir_v (ddr
, dir_v
);
4087 /* Save the dependences carried by outer loops. */
4088 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4089 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4091 omega_free_problem (pb
);
4095 /* Omega expects the protected variables (those that have to be kept
4096 after elimination) to appear first in the constraint system.
4097 These variables are the distance variables. In the following
4098 initialization we declare NB_LOOPS safe variables, and the total
4099 number of variables for the constraint system is 2*NB_LOOPS. */
4100 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4101 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4103 omega_free_problem (pb
);
4105 /* Stop computation if not decidable, or no dependence. */
4106 if (res
== false || *maybe_dependent
== false)
4109 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4110 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
4112 omega_free_problem (pb
);
4117 /* Return true when DDR contains the same information as that stored
4118 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4121 ddr_consistent_p (FILE *file
,
4122 struct data_dependence_relation
*ddr
,
4123 vec
<lambda_vector
> dist_vects
,
4124 vec
<lambda_vector
> dir_vects
)
4128 /* If dump_file is set, output there. */
4129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4132 if (dist_vects
.length () != DDR_NUM_DIST_VECTS (ddr
))
4134 lambda_vector b_dist_v
;
4135 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4136 dist_vects
.length (),
4137 DDR_NUM_DIST_VECTS (ddr
));
4139 fprintf (file
, "Banerjee dist vectors:\n");
4140 FOR_EACH_VEC_ELT (dist_vects
, i
, b_dist_v
)
4141 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
4143 fprintf (file
, "Omega dist vectors:\n");
4144 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4145 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
4147 fprintf (file
, "data dependence relation:\n");
4148 dump_data_dependence_relation (file
, ddr
);
4150 fprintf (file
, ")\n");
4154 if (dir_vects
.length () != DDR_NUM_DIR_VECTS (ddr
))
4156 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4157 dir_vects
.length (),
4158 DDR_NUM_DIR_VECTS (ddr
));
4162 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4164 lambda_vector a_dist_v
;
4165 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
4167 /* Distance vectors are not ordered in the same way in the DDR
4168 and in the DIST_VECTS: search for a matching vector. */
4169 FOR_EACH_VEC_ELT (dist_vects
, j
, a_dist_v
)
4170 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
4173 if (j
== dist_vects
.length ())
4175 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
4176 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
4177 fprintf (file
, "not found in Omega dist vectors:\n");
4178 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4179 fprintf (file
, "data dependence relation:\n");
4180 dump_data_dependence_relation (file
, ddr
);
4181 fprintf (file
, ")\n");
4185 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
4187 lambda_vector a_dir_v
;
4188 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
4190 /* Direction vectors are not ordered in the same way in the DDR
4191 and in the DIR_VECTS: search for a matching vector. */
4192 FOR_EACH_VEC_ELT (dir_vects
, j
, a_dir_v
)
4193 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4196 if (j
== dist_vects
.length ())
4198 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4199 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4200 fprintf (file
, "not found in Omega dir vectors:\n");
4201 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4202 fprintf (file
, "data dependence relation:\n");
4203 dump_data_dependence_relation (file
, ddr
);
4204 fprintf (file
, ")\n");
4211 /* This computes the affine dependence relation between A and B with
4212 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4213 independence between two accesses, while CHREC_DONT_KNOW is used
4214 for representing the unknown relation.
4216 Note that it is possible to stop the computation of the dependence
4217 relation the first time we detect a CHREC_KNOWN element for a given
4221 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4222 struct loop
*loop_nest
)
4224 struct data_reference
*dra
= DDR_A (ddr
);
4225 struct data_reference
*drb
= DDR_B (ddr
);
4227 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4229 fprintf (dump_file
, "(compute_affine_dependence\n");
4230 fprintf (dump_file
, " stmt_a: ");
4231 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4232 fprintf (dump_file
, " stmt_b: ");
4233 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4236 /* Analyze only when the dependence relation is not yet known. */
4237 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4239 dependence_stats
.num_dependence_tests
++;
4241 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4242 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4244 subscript_dependence_tester (ddr
, loop_nest
);
4246 if (flag_check_data_deps
)
4248 /* Dump the dependences from the first algorithm. */
4249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4251 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4252 dump_data_dependence_relation (dump_file
, ddr
);
4255 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4257 bool maybe_dependent
;
4258 vec
<lambda_vector
> dir_vects
, dist_vects
;
4260 /* Save the result of the first DD analyzer. */
4261 dist_vects
= DDR_DIST_VECTS (ddr
);
4262 dir_vects
= DDR_DIR_VECTS (ddr
);
4264 /* Reset the information. */
4265 DDR_DIST_VECTS (ddr
).create (0);
4266 DDR_DIR_VECTS (ddr
).create (0);
4268 /* Compute the same information using Omega. */
4269 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4270 goto csys_dont_know
;
4272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4274 fprintf (dump_file
, "Omega Analyzer\n");
4275 dump_data_dependence_relation (dump_file
, ddr
);
4278 /* Check that we get the same information. */
4279 if (maybe_dependent
)
4280 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4286 /* As a last case, if the dependence cannot be determined, or if
4287 the dependence is considered too difficult to determine, answer
4292 dependence_stats
.num_dependence_undetermined
++;
4294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4296 fprintf (dump_file
, "Data ref a:\n");
4297 dump_data_reference (dump_file
, dra
);
4298 fprintf (dump_file
, "Data ref b:\n");
4299 dump_data_reference (dump_file
, drb
);
4300 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4302 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4306 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4308 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4309 fprintf (dump_file
, ") -> no dependence\n");
4310 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4311 fprintf (dump_file
, ") -> dependence analysis failed\n");
4313 fprintf (dump_file
, ")\n");
4317 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4318 the data references in DATAREFS, in the LOOP_NEST. When
4319 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4320 relations. Return true when successful, i.e. data references number
4321 is small enough to be handled. */
4324 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4325 vec
<ddr_p
> *dependence_relations
,
4326 vec
<loop_p
> loop_nest
,
4327 bool compute_self_and_rr
)
4329 struct data_dependence_relation
*ddr
;
4330 struct data_reference
*a
, *b
;
4333 if ((int) datarefs
.length ()
4334 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4336 struct data_dependence_relation
*ddr
;
4338 /* Insert a single relation into dependence_relations:
4340 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4341 dependence_relations
->safe_push (ddr
);
4345 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4346 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4347 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4349 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4350 dependence_relations
->safe_push (ddr
);
4351 if (loop_nest
.exists ())
4352 compute_affine_dependence (ddr
, loop_nest
[0]);
4355 if (compute_self_and_rr
)
4356 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4358 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4359 dependence_relations
->safe_push (ddr
);
4360 if (loop_nest
.exists ())
4361 compute_affine_dependence (ddr
, loop_nest
[0]);
4367 /* Describes a location of a memory reference. */
4369 typedef struct data_ref_loc_d
4371 /* The memory reference. */
4374 /* True if the memory reference is read. */
4379 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4380 true if STMT clobbers memory, false otherwise. */
4383 get_references_in_stmt (gimple stmt
, vec
<data_ref_loc
, va_heap
> *references
)
4385 bool clobbers_memory
= false;
4388 enum gimple_code stmt_code
= gimple_code (stmt
);
4390 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4391 As we cannot model data-references to not spelled out
4392 accesses give up if they may occur. */
4393 if (stmt_code
== GIMPLE_CALL
4394 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4396 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4397 if (gimple_call_internal_p (stmt
))
4398 switch (gimple_call_internal_fn (stmt
))
4400 case IFN_GOMP_SIMD_LANE
:
4402 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
4403 tree uid
= gimple_call_arg (stmt
, 0);
4404 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
4406 || loop
->simduid
!= SSA_NAME_VAR (uid
))
4407 clobbers_memory
= true;
4411 case IFN_MASK_STORE
:
4414 clobbers_memory
= true;
4418 clobbers_memory
= true;
4420 else if (stmt_code
== GIMPLE_ASM
4421 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
4422 || gimple_vuse (stmt
)))
4423 clobbers_memory
= true;
4425 if (!gimple_vuse (stmt
))
4426 return clobbers_memory
;
4428 if (stmt_code
== GIMPLE_ASSIGN
)
4431 op0
= gimple_assign_lhs (stmt
);
4432 op1
= gimple_assign_rhs1 (stmt
);
4435 || (REFERENCE_CLASS_P (op1
)
4436 && (base
= get_base_address (op1
))
4437 && TREE_CODE (base
) != SSA_NAME
))
4441 references
->safe_push (ref
);
4444 else if (stmt_code
== GIMPLE_CALL
)
4448 ref
.is_read
= false;
4449 if (gimple_call_internal_p (stmt
))
4450 switch (gimple_call_internal_fn (stmt
))
4453 if (gimple_call_lhs (stmt
) == NULL_TREE
)
4456 case IFN_MASK_STORE
:
4457 ref
.ref
= fold_build2 (MEM_REF
,
4459 ? TREE_TYPE (gimple_call_lhs (stmt
))
4460 : TREE_TYPE (gimple_call_arg (stmt
, 3)),
4461 gimple_call_arg (stmt
, 0),
4462 gimple_call_arg (stmt
, 1));
4463 references
->safe_push (ref
);
4469 op0
= gimple_call_lhs (stmt
);
4470 n
= gimple_call_num_args (stmt
);
4471 for (i
= 0; i
< n
; i
++)
4473 op1
= gimple_call_arg (stmt
, i
);
4476 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
4480 references
->safe_push (ref
);
4485 return clobbers_memory
;
4489 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
4492 ref
.is_read
= false;
4493 references
->safe_push (ref
);
4495 return clobbers_memory
;
4498 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4499 reference, returns false, otherwise returns true. NEST is the outermost
4500 loop of the loop nest in which the references should be analyzed. */
4503 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4504 vec
<data_reference_p
> *datarefs
)
4507 auto_vec
<data_ref_loc
, 2> references
;
4510 data_reference_p dr
;
4512 if (get_references_in_stmt (stmt
, &references
))
4515 FOR_EACH_VEC_ELT (references
, i
, ref
)
4517 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4518 ref
->ref
, stmt
, ref
->is_read
);
4519 gcc_assert (dr
!= NULL
);
4520 datarefs
->safe_push (dr
);
4522 references
.release ();
4526 /* Stores the data references in STMT to DATAREFS. If there is an
4527 unanalyzable reference, returns false, otherwise returns true.
4528 NEST is the outermost loop of the loop nest in which the references
4529 should be instantiated, LOOP is the loop in which the references
4530 should be analyzed. */
4533 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4534 vec
<data_reference_p
> *datarefs
)
4537 auto_vec
<data_ref_loc
, 2> references
;
4540 data_reference_p dr
;
4542 if (get_references_in_stmt (stmt
, &references
))
4545 FOR_EACH_VEC_ELT (references
, i
, ref
)
4547 dr
= create_data_ref (nest
, loop
, ref
->ref
, stmt
, ref
->is_read
);
4548 gcc_assert (dr
!= NULL
);
4549 datarefs
->safe_push (dr
);
4552 references
.release ();
4556 /* Search the data references in LOOP, and record the information into
4557 DATAREFS. Returns chrec_dont_know when failing to analyze a
4558 difficult case, returns NULL_TREE otherwise. */
4561 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4562 vec
<data_reference_p
> *datarefs
)
4564 gimple_stmt_iterator bsi
;
4566 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4568 gimple stmt
= gsi_stmt (bsi
);
4570 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4572 struct data_reference
*res
;
4573 res
= XCNEW (struct data_reference
);
4574 datarefs
->safe_push (res
);
4576 return chrec_dont_know
;
4583 /* Search the data references in LOOP, and record the information into
4584 DATAREFS. Returns chrec_dont_know when failing to analyze a
4585 difficult case, returns NULL_TREE otherwise.
4587 TODO: This function should be made smarter so that it can handle address
4588 arithmetic as if they were array accesses, etc. */
4591 find_data_references_in_loop (struct loop
*loop
,
4592 vec
<data_reference_p
> *datarefs
)
4594 basic_block bb
, *bbs
;
4597 bbs
= get_loop_body_in_dom_order (loop
);
4599 for (i
= 0; i
< loop
->num_nodes
; i
++)
4603 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4606 return chrec_dont_know
;
4614 /* Recursive helper function. */
4617 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4619 /* Inner loops of the nest should not contain siblings. Example:
4620 when there are two consecutive loops,
4631 the dependence relation cannot be captured by the distance
4636 loop_nest
->safe_push (loop
);
4638 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4642 /* Return false when the LOOP is not well nested. Otherwise return
4643 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4644 contain the loops from the outermost to the innermost, as they will
4645 appear in the classic distance vector. */
4648 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4650 loop_nest
->safe_push (loop
);
4652 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4656 /* Returns true when the data dependences have been computed, false otherwise.
4657 Given a loop nest LOOP, the following vectors are returned:
4658 DATAREFS is initialized to all the array elements contained in this loop,
4659 DEPENDENCE_RELATIONS contains the relations between the data references.
4660 Compute read-read and self relations if
4661 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4664 compute_data_dependences_for_loop (struct loop
*loop
,
4665 bool compute_self_and_read_read_dependences
,
4666 vec
<loop_p
> *loop_nest
,
4667 vec
<data_reference_p
> *datarefs
,
4668 vec
<ddr_p
> *dependence_relations
)
4672 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4674 /* If the loop nest is not well formed, or one of the data references
4675 is not computable, give up without spending time to compute other
4678 || !find_loop_nest (loop
, loop_nest
)
4679 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4680 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4681 compute_self_and_read_read_dependences
))
4684 if (dump_file
&& (dump_flags
& TDF_STATS
))
4686 fprintf (dump_file
, "Dependence tester statistics:\n");
4688 fprintf (dump_file
, "Number of dependence tests: %d\n",
4689 dependence_stats
.num_dependence_tests
);
4690 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4691 dependence_stats
.num_dependence_dependent
);
4692 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4693 dependence_stats
.num_dependence_independent
);
4694 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4695 dependence_stats
.num_dependence_undetermined
);
4697 fprintf (dump_file
, "Number of subscript tests: %d\n",
4698 dependence_stats
.num_subscript_tests
);
4699 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4700 dependence_stats
.num_subscript_undetermined
);
4701 fprintf (dump_file
, "Number of same subscript function: %d\n",
4702 dependence_stats
.num_same_subscript_function
);
4704 fprintf (dump_file
, "Number of ziv tests: %d\n",
4705 dependence_stats
.num_ziv
);
4706 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4707 dependence_stats
.num_ziv_dependent
);
4708 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4709 dependence_stats
.num_ziv_independent
);
4710 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4711 dependence_stats
.num_ziv_unimplemented
);
4713 fprintf (dump_file
, "Number of siv tests: %d\n",
4714 dependence_stats
.num_siv
);
4715 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4716 dependence_stats
.num_siv_dependent
);
4717 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4718 dependence_stats
.num_siv_independent
);
4719 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4720 dependence_stats
.num_siv_unimplemented
);
4722 fprintf (dump_file
, "Number of miv tests: %d\n",
4723 dependence_stats
.num_miv
);
4724 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4725 dependence_stats
.num_miv_dependent
);
4726 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4727 dependence_stats
.num_miv_independent
);
4728 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4729 dependence_stats
.num_miv_unimplemented
);
4735 /* Returns true when the data dependences for the basic block BB have been
4736 computed, false otherwise.
4737 DATAREFS is initialized to all the array elements contained in this basic
4738 block, DEPENDENCE_RELATIONS contains the relations between the data
4739 references. Compute read-read and self relations if
4740 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4742 compute_data_dependences_for_bb (basic_block bb
,
4743 bool compute_self_and_read_read_dependences
,
4744 vec
<data_reference_p
> *datarefs
,
4745 vec
<ddr_p
> *dependence_relations
)
4747 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4750 return compute_all_dependences (*datarefs
, dependence_relations
, vNULL
,
4751 compute_self_and_read_read_dependences
);
4754 /* Entry point (for testing only). Analyze all the data references
4755 and the dependence relations in LOOP.
4757 The data references are computed first.
4759 A relation on these nodes is represented by a complete graph. Some
4760 of the relations could be of no interest, thus the relations can be
4763 In the following function we compute all the relations. This is
4764 just a first implementation that is here for:
4765 - for showing how to ask for the dependence relations,
4766 - for the debugging the whole dependence graph,
4767 - for the dejagnu testcases and maintenance.
4769 It is possible to ask only for a part of the graph, avoiding to
4770 compute the whole dependence graph. The computed dependences are
4771 stored in a knowledge base (KB) such that later queries don't
4772 recompute the same information. The implementation of this KB is
4773 transparent to the optimizer, and thus the KB can be changed with a
4774 more efficient implementation, or the KB could be disabled. */
4776 analyze_all_data_dependences (struct loop
*loop
)
4779 int nb_data_refs
= 10;
4780 vec
<data_reference_p
> datarefs
;
4781 datarefs
.create (nb_data_refs
);
4782 vec
<ddr_p
> dependence_relations
;
4783 dependence_relations
.create (nb_data_refs
* nb_data_refs
);
4784 vec
<loop_p
> loop_nest
;
4785 loop_nest
.create (3);
4787 /* Compute DDs on the whole function. */
4788 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4789 &dependence_relations
);
4793 dump_data_dependence_relations (dump_file
, dependence_relations
);
4794 fprintf (dump_file
, "\n\n");
4796 if (dump_flags
& TDF_DETAILS
)
4797 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4799 if (dump_flags
& TDF_STATS
)
4801 unsigned nb_top_relations
= 0;
4802 unsigned nb_bot_relations
= 0;
4803 unsigned nb_chrec_relations
= 0;
4804 struct data_dependence_relation
*ddr
;
4806 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4808 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4811 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4815 nb_chrec_relations
++;
4818 gather_stats_on_scev_database ();
4822 loop_nest
.release ();
4823 free_dependence_relations (dependence_relations
);
4824 free_data_refs (datarefs
);
4827 /* Computes all the data dependences and check that the results of
4828 several analyzers are the same. */
4831 tree_check_data_deps (void)
4833 struct loop
*loop_nest
;
4835 FOR_EACH_LOOP (loop_nest
, 0)
4836 analyze_all_data_dependences (loop_nest
);
4839 /* Free the memory used by a data dependence relation DDR. */
4842 free_dependence_relation (struct data_dependence_relation
*ddr
)
4847 if (DDR_SUBSCRIPTS (ddr
).exists ())
4848 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4849 DDR_DIST_VECTS (ddr
).release ();
4850 DDR_DIR_VECTS (ddr
).release ();
4855 /* Free the memory used by the data dependence relations from
4856 DEPENDENCE_RELATIONS. */
4859 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4862 struct data_dependence_relation
*ddr
;
4864 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4866 free_dependence_relation (ddr
);
4868 dependence_relations
.release ();
4871 /* Free the memory used by the data references from DATAREFS. */
4874 free_data_refs (vec
<data_reference_p
> datarefs
)
4877 struct data_reference
*dr
;
4879 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4881 datarefs
.release ();