1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
82 #include "gimple-iterator.h"
83 #include "tree-ssa-loop-niter.h"
84 #include "tree-ssa-loop.h"
87 #include "tree-data-ref.h"
88 #include "tree-scalar-evolution.h"
90 #include "langhooks.h"
91 #include "tree-affine.h"
94 static struct datadep_stats
96 int num_dependence_tests
;
97 int num_dependence_dependent
;
98 int num_dependence_independent
;
99 int num_dependence_undetermined
;
101 int num_subscript_tests
;
102 int num_subscript_undetermined
;
103 int num_same_subscript_function
;
106 int num_ziv_independent
;
107 int num_ziv_dependent
;
108 int num_ziv_unimplemented
;
111 int num_siv_independent
;
112 int num_siv_dependent
;
113 int num_siv_unimplemented
;
116 int num_miv_independent
;
117 int num_miv_dependent
;
118 int num_miv_unimplemented
;
121 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
122 struct data_reference
*,
123 struct data_reference
*,
125 /* Returns true iff A divides B. */
128 tree_fold_divides_p (const_tree a
, const_tree b
)
130 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
131 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
132 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
135 /* Returns true iff A divides B. */
138 int_divides_p (int a
, int b
)
140 return ((b
% a
) == 0);
145 /* Dump into FILE all the data references from DATAREFS. */
148 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
151 struct data_reference
*dr
;
153 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
154 dump_data_reference (file
, dr
);
157 /* Unified dump into FILE all the data references from DATAREFS. */
160 debug (vec
<data_reference_p
> &ref
)
162 dump_data_references (stderr
, ref
);
166 debug (vec
<data_reference_p
> *ptr
)
171 fprintf (stderr
, "<nil>\n");
175 /* Dump into STDERR all the data references from DATAREFS. */
178 debug_data_references (vec
<data_reference_p
> datarefs
)
180 dump_data_references (stderr
, datarefs
);
183 /* Print to STDERR the data_reference DR. */
186 debug_data_reference (struct data_reference
*dr
)
188 dump_data_reference (stderr
, dr
);
191 /* Dump function for a DATA_REFERENCE structure. */
194 dump_data_reference (FILE *outf
,
195 struct data_reference
*dr
)
199 fprintf (outf
, "#(Data Ref: \n");
200 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
201 fprintf (outf
, "# stmt: ");
202 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
203 fprintf (outf
, "# ref: ");
204 print_generic_stmt (outf
, DR_REF (dr
), 0);
205 fprintf (outf
, "# base_object: ");
206 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
208 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
210 fprintf (outf
, "# Access function %d: ", i
);
211 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
213 fprintf (outf
, "#)\n");
216 /* Unified dump function for a DATA_REFERENCE structure. */
219 debug (data_reference
&ref
)
221 dump_data_reference (stderr
, &ref
);
225 debug (data_reference
*ptr
)
230 fprintf (stderr
, "<nil>\n");
234 /* Dumps the affine function described by FN to the file OUTF. */
237 dump_affine_function (FILE *outf
, affine_fn fn
)
242 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
243 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
245 fprintf (outf
, " + ");
246 print_generic_expr (outf
, coef
, TDF_SLIM
);
247 fprintf (outf
, " * x_%u", i
);
251 /* Dumps the conflict function CF to the file OUTF. */
254 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
258 if (cf
->n
== NO_DEPENDENCE
)
259 fprintf (outf
, "no dependence");
260 else if (cf
->n
== NOT_KNOWN
)
261 fprintf (outf
, "not known");
264 for (i
= 0; i
< cf
->n
; i
++)
269 dump_affine_function (outf
, cf
->fns
[i
]);
275 /* Dump function for a SUBSCRIPT structure. */
278 dump_subscript (FILE *outf
, struct subscript
*subscript
)
280 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
282 fprintf (outf
, "\n (subscript \n");
283 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
284 dump_conflict_function (outf
, cf
);
285 if (CF_NONTRIVIAL_P (cf
))
287 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
288 fprintf (outf
, "\n last_conflict: ");
289 print_generic_expr (outf
, last_iteration
, 0);
292 cf
= SUB_CONFLICTS_IN_B (subscript
);
293 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
294 dump_conflict_function (outf
, cf
);
295 if (CF_NONTRIVIAL_P (cf
))
297 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
298 fprintf (outf
, "\n last_conflict: ");
299 print_generic_expr (outf
, last_iteration
, 0);
302 fprintf (outf
, "\n (Subscript distance: ");
303 print_generic_expr (outf
, SUB_DISTANCE (subscript
), 0);
304 fprintf (outf
, " ))\n");
307 /* Print the classic direction vector DIRV to OUTF. */
310 print_direction_vector (FILE *outf
,
316 for (eq
= 0; eq
< length
; eq
++)
318 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
324 fprintf (outf
, " +");
327 fprintf (outf
, " -");
330 fprintf (outf
, " =");
332 case dir_positive_or_equal
:
333 fprintf (outf
, " +=");
335 case dir_positive_or_negative
:
336 fprintf (outf
, " +-");
338 case dir_negative_or_equal
:
339 fprintf (outf
, " -=");
342 fprintf (outf
, " *");
345 fprintf (outf
, "indep");
349 fprintf (outf
, "\n");
352 /* Print a vector of direction vectors. */
355 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
361 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
362 print_direction_vector (outf
, v
, length
);
365 /* Print out a vector VEC of length N to OUTFILE. */
368 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
372 for (i
= 0; i
< n
; i
++)
373 fprintf (outfile
, "%3d ", vector
[i
]);
374 fprintf (outfile
, "\n");
377 /* Print a vector of distance vectors. */
380 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
386 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
387 print_lambda_vector (outf
, v
, length
);
390 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
393 dump_data_dependence_relation (FILE *outf
,
394 struct data_dependence_relation
*ddr
)
396 struct data_reference
*dra
, *drb
;
398 fprintf (outf
, "(Data Dep: \n");
400 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
407 dump_data_reference (outf
, dra
);
409 fprintf (outf
, " (nil)\n");
411 dump_data_reference (outf
, drb
);
413 fprintf (outf
, " (nil)\n");
415 fprintf (outf
, " (don't know)\n)\n");
421 dump_data_reference (outf
, dra
);
422 dump_data_reference (outf
, drb
);
424 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
425 fprintf (outf
, " (no dependence)\n");
427 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
432 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
434 fprintf (outf
, " access_fn_A: ");
435 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
436 fprintf (outf
, " access_fn_B: ");
437 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
438 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
441 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
442 fprintf (outf
, " loop nest: (");
443 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
444 fprintf (outf
, "%d ", loopi
->num
);
445 fprintf (outf
, ")\n");
447 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
449 fprintf (outf
, " distance_vector: ");
450 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
454 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
456 fprintf (outf
, " direction_vector: ");
457 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
462 fprintf (outf
, ")\n");
468 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
470 dump_data_dependence_relation (stderr
, ddr
);
473 /* Dump into FILE all the dependence relations from DDRS. */
476 dump_data_dependence_relations (FILE *file
,
480 struct data_dependence_relation
*ddr
;
482 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
483 dump_data_dependence_relation (file
, ddr
);
487 debug (vec
<ddr_p
> &ref
)
489 dump_data_dependence_relations (stderr
, ref
);
493 debug (vec
<ddr_p
> *ptr
)
498 fprintf (stderr
, "<nil>\n");
502 /* Dump to STDERR all the dependence relations from DDRS. */
505 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
507 dump_data_dependence_relations (stderr
, ddrs
);
510 /* Dumps the distance and direction vectors in FILE. DDRS contains
511 the dependence relations, and VECT_SIZE is the size of the
512 dependence vectors, or in other words the number of loops in the
516 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
519 struct data_dependence_relation
*ddr
;
522 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
523 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
525 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
527 fprintf (file
, "DISTANCE_V (");
528 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
529 fprintf (file
, ")\n");
532 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
534 fprintf (file
, "DIRECTION_V (");
535 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
536 fprintf (file
, ")\n");
540 fprintf (file
, "\n\n");
543 /* Dumps the data dependence relations DDRS in FILE. */
546 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
549 struct data_dependence_relation
*ddr
;
551 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
552 dump_data_dependence_relation (file
, ddr
);
554 fprintf (file
, "\n\n");
558 debug_ddrs (vec
<ddr_p
> ddrs
)
560 dump_ddrs (stderr
, ddrs
);
563 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
564 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
565 constant of type ssizetype, and returns true. If we cannot do this
566 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
570 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
571 tree
*var
, tree
*off
)
575 enum tree_code ocode
= code
;
583 *var
= build_int_cst (type
, 0);
584 *off
= fold_convert (ssizetype
, op0
);
587 case POINTER_PLUS_EXPR
:
592 split_constant_offset (op0
, &var0
, &off0
);
593 split_constant_offset (op1
, &var1
, &off1
);
594 *var
= fold_build2 (code
, type
, var0
, var1
);
595 *off
= size_binop (ocode
, off0
, off1
);
599 if (TREE_CODE (op1
) != INTEGER_CST
)
602 split_constant_offset (op0
, &var0
, &off0
);
603 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
604 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
610 HOST_WIDE_INT pbitsize
, pbitpos
;
611 enum machine_mode pmode
;
612 int punsignedp
, pvolatilep
;
614 op0
= TREE_OPERAND (op0
, 0);
615 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
616 &pmode
, &punsignedp
, &pvolatilep
, false);
618 if (pbitpos
% BITS_PER_UNIT
!= 0)
620 base
= build_fold_addr_expr (base
);
621 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
625 split_constant_offset (poffset
, &poffset
, &off1
);
626 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
627 if (POINTER_TYPE_P (TREE_TYPE (base
)))
628 base
= fold_build_pointer_plus (base
, poffset
);
630 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
631 fold_convert (TREE_TYPE (base
), poffset
));
634 var0
= fold_convert (type
, base
);
636 /* If variable length types are involved, punt, otherwise casts
637 might be converted into ARRAY_REFs in gimplify_conversion.
638 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
639 possibly no longer appears in current GIMPLE, might resurface.
640 This perhaps could run
641 if (CONVERT_EXPR_P (var0))
643 gimplify_conversion (&var0);
644 // Attempt to fill in any within var0 found ARRAY_REF's
645 // element size from corresponding op embedded ARRAY_REF,
646 // if unsuccessful, just punt.
648 while (POINTER_TYPE_P (type
))
649 type
= TREE_TYPE (type
);
650 if (int_size_in_bytes (type
) < 0)
660 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
661 enum tree_code subcode
;
663 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
666 var0
= gimple_assign_rhs1 (def_stmt
);
667 subcode
= gimple_assign_rhs_code (def_stmt
);
668 var1
= gimple_assign_rhs2 (def_stmt
);
670 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
674 /* We must not introduce undefined overflow, and we must not change the value.
675 Hence we're okay if the inner type doesn't overflow to start with
676 (pointer or signed), the outer type also is an integer or pointer
677 and the outer precision is at least as large as the inner. */
678 tree itype
= TREE_TYPE (op0
);
679 if ((POINTER_TYPE_P (itype
)
680 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
681 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
682 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
684 split_constant_offset (op0
, &var0
, off
);
685 *var
= fold_convert (type
, var0
);
696 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
697 will be ssizetype. */
700 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
702 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
706 *off
= ssize_int (0);
709 if (tree_is_chrec (exp
)
710 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
713 otype
= TREE_TYPE (exp
);
714 code
= TREE_CODE (exp
);
715 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
716 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
718 *var
= fold_convert (type
, e
);
723 /* Returns the address ADDR of an object in a canonical shape (without nop
724 casts, and with type of pointer to the object). */
727 canonicalize_base_object_address (tree addr
)
733 /* The base address may be obtained by casting from integer, in that case
735 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
738 if (TREE_CODE (addr
) != ADDR_EXPR
)
741 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
744 /* Analyzes the behavior of the memory reference DR in the innermost loop or
745 basic block that contains it. Returns true if analysis succeed or false
749 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
751 gimple stmt
= DR_STMT (dr
);
752 struct loop
*loop
= loop_containing_stmt (stmt
);
753 tree ref
= DR_REF (dr
);
754 HOST_WIDE_INT pbitsize
, pbitpos
;
756 enum machine_mode pmode
;
757 int punsignedp
, pvolatilep
;
758 affine_iv base_iv
, offset_iv
;
759 tree init
, dinit
, step
;
760 bool in_loop
= (loop
&& loop
->num
);
762 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
763 fprintf (dump_file
, "analyze_innermost: ");
765 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
766 &pmode
, &punsignedp
, &pvolatilep
, false);
767 gcc_assert (base
!= NULL_TREE
);
769 if (pbitpos
% BITS_PER_UNIT
!= 0)
771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
772 fprintf (dump_file
, "failed: bit offset alignment.\n");
776 if (TREE_CODE (base
) == MEM_REF
)
778 if (!integer_zerop (TREE_OPERAND (base
, 1)))
780 double_int moff
= mem_ref_offset (base
);
781 tree mofft
= double_int_to_tree (sizetype
, moff
);
785 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
787 base
= TREE_OPERAND (base
, 0);
790 base
= build_fold_addr_expr (base
);
794 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
795 nest
? true : false))
799 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
800 fprintf (dump_file
, "failed: evolution of base is not"
807 base_iv
.step
= ssize_int (0);
808 base_iv
.no_overflow
= true;
815 base_iv
.step
= ssize_int (0);
816 base_iv
.no_overflow
= true;
821 offset_iv
.base
= ssize_int (0);
822 offset_iv
.step
= ssize_int (0);
828 offset_iv
.base
= poffset
;
829 offset_iv
.step
= ssize_int (0);
831 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
833 nest
? true : false))
837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
838 fprintf (dump_file
, "failed: evolution of offset is not"
844 offset_iv
.base
= poffset
;
845 offset_iv
.step
= ssize_int (0);
850 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
851 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
852 init
= size_binop (PLUS_EXPR
, init
, dinit
);
853 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
854 init
= size_binop (PLUS_EXPR
, init
, dinit
);
856 step
= size_binop (PLUS_EXPR
,
857 fold_convert (ssizetype
, base_iv
.step
),
858 fold_convert (ssizetype
, offset_iv
.step
));
860 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
862 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
866 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
868 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
869 fprintf (dump_file
, "success.\n");
874 /* Determines the base object and the list of indices of memory reference
875 DR, analyzed in LOOP and instantiated in loop nest NEST. */
878 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
880 vec
<tree
> access_fns
= vNULL
;
882 tree base
, off
, access_fn
;
883 basic_block before_loop
;
885 /* If analyzing a basic-block there are no indices to analyze
886 and thus no access functions. */
889 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
890 DR_ACCESS_FNS (dr
).create (0);
895 before_loop
= block_before_loop (nest
);
897 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
898 into a two element array with a constant index. The base is
899 then just the immediate underlying object. */
900 if (TREE_CODE (ref
) == REALPART_EXPR
)
902 ref
= TREE_OPERAND (ref
, 0);
903 access_fns
.safe_push (integer_zero_node
);
905 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
907 ref
= TREE_OPERAND (ref
, 0);
908 access_fns
.safe_push (integer_one_node
);
911 /* Analyze access functions of dimensions we know to be independent. */
912 while (handled_component_p (ref
))
914 if (TREE_CODE (ref
) == ARRAY_REF
)
916 op
= TREE_OPERAND (ref
, 1);
917 access_fn
= analyze_scalar_evolution (loop
, op
);
918 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
919 access_fns
.safe_push (access_fn
);
921 else if (TREE_CODE (ref
) == COMPONENT_REF
922 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
924 /* For COMPONENT_REFs of records (but not unions!) use the
925 FIELD_DECL offset as constant access function so we can
926 disambiguate a[i].f1 and a[i].f2. */
927 tree off
= component_ref_field_offset (ref
);
928 off
= size_binop (PLUS_EXPR
,
929 size_binop (MULT_EXPR
,
930 fold_convert (bitsizetype
, off
),
931 bitsize_int (BITS_PER_UNIT
)),
932 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
933 access_fns
.safe_push (off
);
936 /* If we have an unhandled component we could not translate
937 to an access function stop analyzing. We have determined
938 our base object in this case. */
941 ref
= TREE_OPERAND (ref
, 0);
944 /* If the address operand of a MEM_REF base has an evolution in the
945 analyzed nest, add it as an additional independent access-function. */
946 if (TREE_CODE (ref
) == MEM_REF
)
948 op
= TREE_OPERAND (ref
, 0);
949 access_fn
= analyze_scalar_evolution (loop
, op
);
950 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
951 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
954 tree memoff
= TREE_OPERAND (ref
, 1);
955 base
= initial_condition (access_fn
);
956 orig_type
= TREE_TYPE (base
);
957 STRIP_USELESS_TYPE_CONVERSION (base
);
958 split_constant_offset (base
, &base
, &off
);
959 /* Fold the MEM_REF offset into the evolutions initial
960 value to make more bases comparable. */
961 if (!integer_zerop (memoff
))
963 off
= size_binop (PLUS_EXPR
, off
,
964 fold_convert (ssizetype
, memoff
));
965 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
967 access_fn
= chrec_replace_initial_condition
968 (access_fn
, fold_convert (orig_type
, off
));
969 /* ??? This is still not a suitable base object for
970 dr_may_alias_p - the base object needs to be an
971 access that covers the object as whole. With
972 an evolution in the pointer this cannot be
974 As a band-aid, mark the access so we can special-case
975 it in dr_may_alias_p. */
976 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
977 MEM_REF
, TREE_TYPE (ref
),
979 DR_UNCONSTRAINED_BASE (dr
) = true;
980 access_fns
.safe_push (access_fn
);
983 else if (DECL_P (ref
))
985 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
986 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
987 build_fold_addr_expr (ref
),
988 build_int_cst (reference_alias_ptr_type (ref
), 0));
991 DR_BASE_OBJECT (dr
) = ref
;
992 DR_ACCESS_FNS (dr
) = access_fns
;
995 /* Extracts the alias analysis information from the memory reference DR. */
998 dr_analyze_alias (struct data_reference
*dr
)
1000 tree ref
= DR_REF (dr
);
1001 tree base
= get_base_address (ref
), addr
;
1003 if (INDIRECT_REF_P (base
)
1004 || TREE_CODE (base
) == MEM_REF
)
1006 addr
= TREE_OPERAND (base
, 0);
1007 if (TREE_CODE (addr
) == SSA_NAME
)
1008 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1012 /* Frees data reference DR. */
1015 free_data_ref (data_reference_p dr
)
1017 DR_ACCESS_FNS (dr
).release ();
1021 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1022 is read if IS_READ is true, write otherwise. Returns the
1023 data_reference description of MEMREF. NEST is the outermost loop
1024 in which the reference should be instantiated, LOOP is the loop in
1025 which the data reference should be analyzed. */
1027 struct data_reference
*
1028 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
1031 struct data_reference
*dr
;
1033 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1035 fprintf (dump_file
, "Creating dr for ");
1036 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1037 fprintf (dump_file
, "\n");
1040 dr
= XCNEW (struct data_reference
);
1041 DR_STMT (dr
) = stmt
;
1042 DR_REF (dr
) = memref
;
1043 DR_IS_READ (dr
) = is_read
;
1045 dr_analyze_innermost (dr
, nest
);
1046 dr_analyze_indices (dr
, nest
, loop
);
1047 dr_analyze_alias (dr
);
1049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 fprintf (dump_file
, "\tbase_address: ");
1053 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1054 fprintf (dump_file
, "\n\toffset from base address: ");
1055 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1056 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1057 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1058 fprintf (dump_file
, "\n\tstep: ");
1059 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1060 fprintf (dump_file
, "\n\taligned to: ");
1061 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1062 fprintf (dump_file
, "\n\tbase_object: ");
1063 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1064 fprintf (dump_file
, "\n");
1065 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1067 fprintf (dump_file
, "\tAccess function %d: ", i
);
1068 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1075 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1078 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1082 STRIP_NOPS (offset1
);
1083 STRIP_NOPS (offset2
);
1085 if (offset1
== offset2
)
1088 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1089 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1092 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1093 TREE_OPERAND (offset2
, 0));
1095 if (!res
|| !BINARY_CLASS_P (offset1
))
1098 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1099 TREE_OPERAND (offset2
, 1));
1104 /* Check if DRA and DRB have equal offsets. */
1106 dr_equal_offsets_p (struct data_reference
*dra
,
1107 struct data_reference
*drb
)
1109 tree offset1
, offset2
;
1111 offset1
= DR_OFFSET (dra
);
1112 offset2
= DR_OFFSET (drb
);
1114 return dr_equal_offsets_p1 (offset1
, offset2
);
1117 /* Returns true if FNA == FNB. */
1120 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1122 unsigned i
, n
= fna
.length ();
1124 if (n
!= fnb
.length ())
1127 for (i
= 0; i
< n
; i
++)
1128 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1134 /* If all the functions in CF are the same, returns one of them,
1135 otherwise returns NULL. */
1138 common_affine_function (conflict_function
*cf
)
1143 if (!CF_NONTRIVIAL_P (cf
))
1144 return affine_fn ();
1148 for (i
= 1; i
< cf
->n
; i
++)
1149 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1150 return affine_fn ();
1155 /* Returns the base of the affine function FN. */
1158 affine_function_base (affine_fn fn
)
1163 /* Returns true if FN is a constant. */
1166 affine_function_constant_p (affine_fn fn
)
1171 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1172 if (!integer_zerop (coef
))
1178 /* Returns true if FN is the zero constant function. */
1181 affine_function_zero_p (affine_fn fn
)
1183 return (integer_zerop (affine_function_base (fn
))
1184 && affine_function_constant_p (fn
));
1187 /* Returns a signed integer type with the largest precision from TA
1191 signed_type_for_types (tree ta
, tree tb
)
1193 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1194 return signed_type_for (ta
);
1196 return signed_type_for (tb
);
1199 /* Applies operation OP on affine functions FNA and FNB, and returns the
1203 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1209 if (fnb
.length () > fna
.length ())
1221 for (i
= 0; i
< n
; i
++)
1223 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1224 TREE_TYPE (fnb
[i
]));
1225 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1228 for (; fna
.iterate (i
, &coef
); i
++)
1229 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1230 coef
, integer_zero_node
));
1231 for (; fnb
.iterate (i
, &coef
); i
++)
1232 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1233 integer_zero_node
, coef
));
1238 /* Returns the sum of affine functions FNA and FNB. */
1241 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1243 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1246 /* Returns the difference of affine functions FNA and FNB. */
1249 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1251 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1254 /* Frees affine function FN. */
1257 affine_fn_free (affine_fn fn
)
1262 /* Determine for each subscript in the data dependence relation DDR
1266 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1268 conflict_function
*cf_a
, *cf_b
;
1269 affine_fn fn_a
, fn_b
, diff
;
1271 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1275 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1277 struct subscript
*subscript
;
1279 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1280 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1281 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1283 fn_a
= common_affine_function (cf_a
);
1284 fn_b
= common_affine_function (cf_b
);
1285 if (!fn_a
.exists () || !fn_b
.exists ())
1287 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1290 diff
= affine_fn_minus (fn_a
, fn_b
);
1292 if (affine_function_constant_p (diff
))
1293 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1295 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1297 affine_fn_free (diff
);
1302 /* Returns the conflict function for "unknown". */
1304 static conflict_function
*
1305 conflict_fn_not_known (void)
1307 conflict_function
*fn
= XCNEW (conflict_function
);
1313 /* Returns the conflict function for "independent". */
1315 static conflict_function
*
1316 conflict_fn_no_dependence (void)
1318 conflict_function
*fn
= XCNEW (conflict_function
);
1319 fn
->n
= NO_DEPENDENCE
;
1324 /* Returns true if the address of OBJ is invariant in LOOP. */
1327 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1329 while (handled_component_p (obj
))
1331 if (TREE_CODE (obj
) == ARRAY_REF
)
1333 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1334 need to check the stride and the lower bound of the reference. */
1335 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1337 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1341 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1343 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1347 obj
= TREE_OPERAND (obj
, 0);
1350 if (!INDIRECT_REF_P (obj
)
1351 && TREE_CODE (obj
) != MEM_REF
)
1354 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1358 /* Returns false if we can prove that data references A and B do not alias,
1359 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1363 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1366 tree addr_a
= DR_BASE_OBJECT (a
);
1367 tree addr_b
= DR_BASE_OBJECT (b
);
1369 /* If we are not processing a loop nest but scalar code we
1370 do not need to care about possible cross-iteration dependences
1371 and thus can process the full original reference. Do so,
1372 similar to how loop invariant motion applies extra offset-based
1376 aff_tree off1
, off2
;
1377 double_int size1
, size2
;
1378 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1379 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1380 aff_combination_scale (&off1
, double_int_minus_one
);
1381 aff_combination_add (&off2
, &off1
);
1382 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1386 /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1387 the size of the base-object. So we cannot do any offset/overlap
1388 based analysis but have to rely on points-to information only. */
1389 if (TREE_CODE (addr_a
) == MEM_REF
1390 && DR_UNCONSTRAINED_BASE (a
))
1392 if (TREE_CODE (addr_b
) == MEM_REF
1393 && DR_UNCONSTRAINED_BASE (b
))
1394 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1395 TREE_OPERAND (addr_b
, 0));
1397 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1398 build_fold_addr_expr (addr_b
));
1400 else if (TREE_CODE (addr_b
) == MEM_REF
1401 && DR_UNCONSTRAINED_BASE (b
))
1402 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1403 TREE_OPERAND (addr_b
, 0));
1405 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1406 that is being subsetted in the loop nest. */
1407 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1408 return refs_output_dependent_p (addr_a
, addr_b
);
1409 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1410 return refs_anti_dependent_p (addr_a
, addr_b
);
1411 return refs_may_alias_p (addr_a
, addr_b
);
1414 /* Initialize a data dependence relation between data accesses A and
1415 B. NB_LOOPS is the number of loops surrounding the references: the
1416 size of the classic distance/direction vectors. */
1418 struct data_dependence_relation
*
1419 initialize_data_dependence_relation (struct data_reference
*a
,
1420 struct data_reference
*b
,
1421 vec
<loop_p
> loop_nest
)
1423 struct data_dependence_relation
*res
;
1426 res
= XNEW (struct data_dependence_relation
);
1429 DDR_LOOP_NEST (res
).create (0);
1430 DDR_REVERSED_P (res
) = false;
1431 DDR_SUBSCRIPTS (res
).create (0);
1432 DDR_DIR_VECTS (res
).create (0);
1433 DDR_DIST_VECTS (res
).create (0);
1435 if (a
== NULL
|| b
== NULL
)
1437 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1441 /* If the data references do not alias, then they are independent. */
1442 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1444 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1448 /* The case where the references are exactly the same. */
1449 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1451 if (loop_nest
.exists ()
1452 && !object_address_invariant_in_loop_p (loop_nest
[0],
1453 DR_BASE_OBJECT (a
)))
1455 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1458 DDR_AFFINE_P (res
) = true;
1459 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1460 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1461 DDR_LOOP_NEST (res
) = loop_nest
;
1462 DDR_INNER_LOOP (res
) = 0;
1463 DDR_SELF_REFERENCE (res
) = true;
1464 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1466 struct subscript
*subscript
;
1468 subscript
= XNEW (struct subscript
);
1469 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1470 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1471 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1472 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1473 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1478 /* If the references do not access the same object, we do not know
1479 whether they alias or not. */
1480 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1482 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1486 /* If the base of the object is not invariant in the loop nest, we cannot
1487 analyze it. TODO -- in fact, it would suffice to record that there may
1488 be arbitrary dependences in the loops where the base object varies. */
1489 if (loop_nest
.exists ()
1490 && !object_address_invariant_in_loop_p (loop_nest
[0],
1491 DR_BASE_OBJECT (a
)))
1493 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1497 /* If the number of dimensions of the access to not agree we can have
1498 a pointer access to a component of the array element type and an
1499 array access while the base-objects are still the same. Punt. */
1500 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1502 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1506 DDR_AFFINE_P (res
) = true;
1507 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1508 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1509 DDR_LOOP_NEST (res
) = loop_nest
;
1510 DDR_INNER_LOOP (res
) = 0;
1511 DDR_SELF_REFERENCE (res
) = false;
1513 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1515 struct subscript
*subscript
;
1517 subscript
= XNEW (struct subscript
);
1518 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1519 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1520 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1521 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1522 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1528 /* Frees memory used by the conflict function F. */
1531 free_conflict_function (conflict_function
*f
)
1535 if (CF_NONTRIVIAL_P (f
))
1537 for (i
= 0; i
< f
->n
; i
++)
1538 affine_fn_free (f
->fns
[i
]);
1543 /* Frees memory used by SUBSCRIPTS. */
1546 free_subscripts (vec
<subscript_p
> subscripts
)
1551 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1553 free_conflict_function (s
->conflicting_iterations_in_a
);
1554 free_conflict_function (s
->conflicting_iterations_in_b
);
1557 subscripts
.release ();
1560 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1564 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1567 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1568 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1569 DDR_SUBSCRIPTS (ddr
).create (0);
1572 /* The dependence relation DDR cannot be represented by a distance
1576 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1579 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1581 DDR_AFFINE_P (ddr
) = false;
1586 /* This section contains the classic Banerjee tests. */
1588 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1589 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1592 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1594 return (evolution_function_is_constant_p (chrec_a
)
1595 && evolution_function_is_constant_p (chrec_b
));
1598 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1599 variable, i.e., if the SIV (Single Index Variable) test is true. */
1602 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1604 if ((evolution_function_is_constant_p (chrec_a
)
1605 && evolution_function_is_univariate_p (chrec_b
))
1606 || (evolution_function_is_constant_p (chrec_b
)
1607 && evolution_function_is_univariate_p (chrec_a
)))
1610 if (evolution_function_is_univariate_p (chrec_a
)
1611 && evolution_function_is_univariate_p (chrec_b
))
1613 switch (TREE_CODE (chrec_a
))
1615 case POLYNOMIAL_CHREC
:
1616 switch (TREE_CODE (chrec_b
))
1618 case POLYNOMIAL_CHREC
:
1619 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1634 /* Creates a conflict function with N dimensions. The affine functions
1635 in each dimension follow. */
1637 static conflict_function
*
1638 conflict_fn (unsigned n
, ...)
1641 conflict_function
*ret
= XCNEW (conflict_function
);
1644 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1648 for (i
= 0; i
< n
; i
++)
1649 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1655 /* Returns constant affine function with value CST. */
1658 affine_fn_cst (tree cst
)
1662 fn
.quick_push (cst
);
1666 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1669 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1672 fn
.create (dim
+ 1);
1675 gcc_assert (dim
> 0);
1676 fn
.quick_push (cst
);
1677 for (i
= 1; i
< dim
; i
++)
1678 fn
.quick_push (integer_zero_node
);
1679 fn
.quick_push (coef
);
1683 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1684 *OVERLAPS_B are initialized to the functions that describe the
1685 relation between the elements accessed twice by CHREC_A and
1686 CHREC_B. For k >= 0, the following property is verified:
1688 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1691 analyze_ziv_subscript (tree chrec_a
,
1693 conflict_function
**overlaps_a
,
1694 conflict_function
**overlaps_b
,
1695 tree
*last_conflicts
)
1697 tree type
, difference
;
1698 dependence_stats
.num_ziv
++;
1700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1701 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1703 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1704 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1705 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1706 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1708 switch (TREE_CODE (difference
))
1711 if (integer_zerop (difference
))
1713 /* The difference is equal to zero: the accessed index
1714 overlaps for each iteration in the loop. */
1715 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1716 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1717 *last_conflicts
= chrec_dont_know
;
1718 dependence_stats
.num_ziv_dependent
++;
1722 /* The accesses do not overlap. */
1723 *overlaps_a
= conflict_fn_no_dependence ();
1724 *overlaps_b
= conflict_fn_no_dependence ();
1725 *last_conflicts
= integer_zero_node
;
1726 dependence_stats
.num_ziv_independent
++;
1731 /* We're not sure whether the indexes overlap. For the moment,
1732 conservatively answer "don't know". */
1733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1734 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1736 *overlaps_a
= conflict_fn_not_known ();
1737 *overlaps_b
= conflict_fn_not_known ();
1738 *last_conflicts
= chrec_dont_know
;
1739 dependence_stats
.num_ziv_unimplemented
++;
1743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1744 fprintf (dump_file
, ")\n");
1747 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1748 and only if it fits to the int type. If this is not the case, or the
1749 bound on the number of iterations of LOOP could not be derived, returns
1753 max_stmt_executions_tree (struct loop
*loop
)
1757 if (!max_stmt_executions (loop
, &nit
))
1758 return chrec_dont_know
;
1760 if (!double_int_fits_to_tree_p (unsigned_type_node
, nit
))
1761 return chrec_dont_know
;
1763 return double_int_to_tree (unsigned_type_node
, nit
);
1766 /* Determine whether the CHREC is always positive/negative. If the expression
1767 cannot be statically analyzed, return false, otherwise set the answer into
1771 chrec_is_positive (tree chrec
, bool *value
)
1773 bool value0
, value1
, value2
;
1774 tree end_value
, nb_iter
;
1776 switch (TREE_CODE (chrec
))
1778 case POLYNOMIAL_CHREC
:
1779 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1780 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1783 /* FIXME -- overflows. */
1784 if (value0
== value1
)
1790 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1791 and the proof consists in showing that the sign never
1792 changes during the execution of the loop, from 0 to
1793 loop->nb_iterations. */
1794 if (!evolution_function_is_affine_p (chrec
))
1797 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1798 if (chrec_contains_undetermined (nb_iter
))
1802 /* TODO -- If the test is after the exit, we may decrease the number of
1803 iterations by one. */
1805 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1808 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1810 if (!chrec_is_positive (end_value
, &value2
))
1814 return value0
== value1
;
1817 switch (tree_int_cst_sgn (chrec
))
1836 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1837 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1838 *OVERLAPS_B are initialized to the functions that describe the
1839 relation between the elements accessed twice by CHREC_A and
1840 CHREC_B. For k >= 0, the following property is verified:
1842 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1845 analyze_siv_subscript_cst_affine (tree chrec_a
,
1847 conflict_function
**overlaps_a
,
1848 conflict_function
**overlaps_b
,
1849 tree
*last_conflicts
)
1851 bool value0
, value1
, value2
;
1852 tree type
, difference
, tmp
;
1854 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1855 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1856 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1857 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1859 /* Special case overlap in the first iteration. */
1860 if (integer_zerop (difference
))
1862 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1863 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1864 *last_conflicts
= integer_one_node
;
1868 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1871 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1873 dependence_stats
.num_siv_unimplemented
++;
1874 *overlaps_a
= conflict_fn_not_known ();
1875 *overlaps_b
= conflict_fn_not_known ();
1876 *last_conflicts
= chrec_dont_know
;
1881 if (value0
== false)
1883 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1885 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1886 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1888 *overlaps_a
= conflict_fn_not_known ();
1889 *overlaps_b
= conflict_fn_not_known ();
1890 *last_conflicts
= chrec_dont_know
;
1891 dependence_stats
.num_siv_unimplemented
++;
1900 chrec_b = {10, +, 1}
1903 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1905 HOST_WIDE_INT numiter
;
1906 struct loop
*loop
= get_chrec_loop (chrec_b
);
1908 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1909 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1910 fold_build1 (ABS_EXPR
, type
, difference
),
1911 CHREC_RIGHT (chrec_b
));
1912 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1913 *last_conflicts
= integer_one_node
;
1916 /* Perform weak-zero siv test to see if overlap is
1917 outside the loop bounds. */
1918 numiter
= max_stmt_executions_int (loop
);
1921 && compare_tree_int (tmp
, numiter
) > 0)
1923 free_conflict_function (*overlaps_a
);
1924 free_conflict_function (*overlaps_b
);
1925 *overlaps_a
= conflict_fn_no_dependence ();
1926 *overlaps_b
= conflict_fn_no_dependence ();
1927 *last_conflicts
= integer_zero_node
;
1928 dependence_stats
.num_siv_independent
++;
1931 dependence_stats
.num_siv_dependent
++;
1935 /* When the step does not divide the difference, there are
1939 *overlaps_a
= conflict_fn_no_dependence ();
1940 *overlaps_b
= conflict_fn_no_dependence ();
1941 *last_conflicts
= integer_zero_node
;
1942 dependence_stats
.num_siv_independent
++;
1951 chrec_b = {10, +, -1}
1953 In this case, chrec_a will not overlap with chrec_b. */
1954 *overlaps_a
= conflict_fn_no_dependence ();
1955 *overlaps_b
= conflict_fn_no_dependence ();
1956 *last_conflicts
= integer_zero_node
;
1957 dependence_stats
.num_siv_independent
++;
1964 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
1966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1967 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1969 *overlaps_a
= conflict_fn_not_known ();
1970 *overlaps_b
= conflict_fn_not_known ();
1971 *last_conflicts
= chrec_dont_know
;
1972 dependence_stats
.num_siv_unimplemented
++;
1977 if (value2
== false)
1981 chrec_b = {10, +, -1}
1983 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1985 HOST_WIDE_INT numiter
;
1986 struct loop
*loop
= get_chrec_loop (chrec_b
);
1988 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1989 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
1990 CHREC_RIGHT (chrec_b
));
1991 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1992 *last_conflicts
= integer_one_node
;
1994 /* Perform weak-zero siv test to see if overlap is
1995 outside the loop bounds. */
1996 numiter
= max_stmt_executions_int (loop
);
1999 && compare_tree_int (tmp
, numiter
) > 0)
2001 free_conflict_function (*overlaps_a
);
2002 free_conflict_function (*overlaps_b
);
2003 *overlaps_a
= conflict_fn_no_dependence ();
2004 *overlaps_b
= conflict_fn_no_dependence ();
2005 *last_conflicts
= integer_zero_node
;
2006 dependence_stats
.num_siv_independent
++;
2009 dependence_stats
.num_siv_dependent
++;
2013 /* When the step does not divide the difference, there
2017 *overlaps_a
= conflict_fn_no_dependence ();
2018 *overlaps_b
= conflict_fn_no_dependence ();
2019 *last_conflicts
= integer_zero_node
;
2020 dependence_stats
.num_siv_independent
++;
2030 In this case, chrec_a will not overlap with chrec_b. */
2031 *overlaps_a
= conflict_fn_no_dependence ();
2032 *overlaps_b
= conflict_fn_no_dependence ();
2033 *last_conflicts
= integer_zero_node
;
2034 dependence_stats
.num_siv_independent
++;
2042 /* Helper recursive function for initializing the matrix A. Returns
2043 the initial value of CHREC. */
2046 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2050 switch (TREE_CODE (chrec
))
2052 case POLYNOMIAL_CHREC
:
2053 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2055 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2056 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2062 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2063 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2065 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2070 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2071 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2076 /* Handle ~X as -1 - X. */
2077 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2078 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2079 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2091 #define FLOOR_DIV(x,y) ((x) / (y))
2093 /* Solves the special case of the Diophantine equation:
2094 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2096 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2097 number of iterations that loops X and Y run. The overlaps will be
2098 constructed as evolutions in dimension DIM. */
2101 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2102 affine_fn
*overlaps_a
,
2103 affine_fn
*overlaps_b
,
2104 tree
*last_conflicts
, int dim
)
2106 if (((step_a
> 0 && step_b
> 0)
2107 || (step_a
< 0 && step_b
< 0)))
2109 int step_overlaps_a
, step_overlaps_b
;
2110 int gcd_steps_a_b
, last_conflict
, tau2
;
2112 gcd_steps_a_b
= gcd (step_a
, step_b
);
2113 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2114 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2118 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2119 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2120 last_conflict
= tau2
;
2121 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2124 *last_conflicts
= chrec_dont_know
;
2126 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2127 build_int_cst (NULL_TREE
,
2129 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2130 build_int_cst (NULL_TREE
,
2136 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2137 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2138 *last_conflicts
= integer_zero_node
;
2142 /* Solves the special case of a Diophantine equation where CHREC_A is
2143 an affine bivariate function, and CHREC_B is an affine univariate
2144 function. For example,
2146 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2148 has the following overlapping functions:
2150 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2151 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2152 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2154 FORNOW: This is a specialized implementation for a case occurring in
2155 a common benchmark. Implement the general algorithm. */
2158 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2159 conflict_function
**overlaps_a
,
2160 conflict_function
**overlaps_b
,
2161 tree
*last_conflicts
)
2163 bool xz_p
, yz_p
, xyz_p
;
2164 int step_x
, step_y
, step_z
;
2165 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2166 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2167 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2168 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2169 affine_fn ova1
, ova2
, ovb
;
2170 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2172 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2173 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2174 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2176 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2177 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2178 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2180 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2183 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2185 *overlaps_a
= conflict_fn_not_known ();
2186 *overlaps_b
= conflict_fn_not_known ();
2187 *last_conflicts
= chrec_dont_know
;
2191 niter
= MIN (niter_x
, niter_z
);
2192 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2195 &last_conflicts_xz
, 1);
2196 niter
= MIN (niter_y
, niter_z
);
2197 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2200 &last_conflicts_yz
, 2);
2201 niter
= MIN (niter_x
, niter_z
);
2202 niter
= MIN (niter_y
, niter
);
2203 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2206 &last_conflicts_xyz
, 3);
2208 xz_p
= !integer_zerop (last_conflicts_xz
);
2209 yz_p
= !integer_zerop (last_conflicts_yz
);
2210 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2212 if (xz_p
|| yz_p
|| xyz_p
)
2214 ova1
= affine_fn_cst (integer_zero_node
);
2215 ova2
= affine_fn_cst (integer_zero_node
);
2216 ovb
= affine_fn_cst (integer_zero_node
);
2219 affine_fn t0
= ova1
;
2222 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2223 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2224 affine_fn_free (t0
);
2225 affine_fn_free (t2
);
2226 *last_conflicts
= last_conflicts_xz
;
2230 affine_fn t0
= ova2
;
2233 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2234 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2235 affine_fn_free (t0
);
2236 affine_fn_free (t2
);
2237 *last_conflicts
= last_conflicts_yz
;
2241 affine_fn t0
= ova1
;
2242 affine_fn t2
= ova2
;
2245 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2246 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2247 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2248 affine_fn_free (t0
);
2249 affine_fn_free (t2
);
2250 affine_fn_free (t4
);
2251 *last_conflicts
= last_conflicts_xyz
;
2253 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2254 *overlaps_b
= conflict_fn (1, ovb
);
2258 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2259 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2260 *last_conflicts
= integer_zero_node
;
2263 affine_fn_free (overlaps_a_xz
);
2264 affine_fn_free (overlaps_b_xz
);
2265 affine_fn_free (overlaps_a_yz
);
2266 affine_fn_free (overlaps_b_yz
);
2267 affine_fn_free (overlaps_a_xyz
);
2268 affine_fn_free (overlaps_b_xyz
);
2271 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2274 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2277 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2280 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2283 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2288 for (i
= 0; i
< m
; i
++)
2289 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2292 /* Store the N x N identity matrix in MAT. */
2295 lambda_matrix_id (lambda_matrix mat
, int size
)
2299 for (i
= 0; i
< size
; i
++)
2300 for (j
= 0; j
< size
; j
++)
2301 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2304 /* Return the first nonzero element of vector VEC1 between START and N.
2305 We must have START <= N. Returns N if VEC1 is the zero vector. */
2308 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2311 while (j
< n
&& vec1
[j
] == 0)
2316 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2317 R2 = R2 + CONST1 * R1. */
2320 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2327 for (i
= 0; i
< n
; i
++)
2328 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2331 /* Swap rows R1 and R2 in matrix MAT. */
2334 lambda_matrix_row_exchange (lambda_matrix mat
, int r1
, int r2
)
2343 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2344 and store the result in VEC2. */
2347 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2348 int size
, int const1
)
2353 lambda_vector_clear (vec2
, size
);
2355 for (i
= 0; i
< size
; i
++)
2356 vec2
[i
] = const1
* vec1
[i
];
2359 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2362 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2365 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2368 /* Negate row R1 of matrix MAT which has N columns. */
2371 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2373 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2376 /* Return true if two vectors are equal. */
2379 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2382 for (i
= 0; i
< size
; i
++)
2383 if (vec1
[i
] != vec2
[i
])
2388 /* Given an M x N integer matrix A, this function determines an M x
2389 M unimodular matrix U, and an M x N echelon matrix S such that
2390 "U.A = S". This decomposition is also known as "right Hermite".
2392 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2393 Restructuring Compilers" Utpal Banerjee. */
2396 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2397 lambda_matrix S
, lambda_matrix U
)
2401 lambda_matrix_copy (A
, S
, m
, n
);
2402 lambda_matrix_id (U
, m
);
2404 for (j
= 0; j
< n
; j
++)
2406 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2409 for (i
= m
- 1; i
>= i0
; i
--)
2411 while (S
[i
][j
] != 0)
2413 int sigma
, factor
, a
, b
;
2417 sigma
= (a
* b
< 0) ? -1: 1;
2420 factor
= sigma
* (a
/ b
);
2422 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2423 lambda_matrix_row_exchange (S
, i
, i
-1);
2425 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2426 lambda_matrix_row_exchange (U
, i
, i
-1);
2433 /* Determines the overlapping elements due to accesses CHREC_A and
2434 CHREC_B, that are affine functions. This function cannot handle
2435 symbolic evolution functions, ie. when initial conditions are
2436 parameters, because it uses lambda matrices of integers. */
2439 analyze_subscript_affine_affine (tree chrec_a
,
2441 conflict_function
**overlaps_a
,
2442 conflict_function
**overlaps_b
,
2443 tree
*last_conflicts
)
2445 unsigned nb_vars_a
, nb_vars_b
, dim
;
2446 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2447 lambda_matrix A
, U
, S
;
2448 struct obstack scratch_obstack
;
2450 if (eq_evolutions_p (chrec_a
, chrec_b
))
2452 /* The accessed index overlaps for each iteration in the
2454 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2455 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2456 *last_conflicts
= chrec_dont_know
;
2459 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2460 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2462 /* For determining the initial intersection, we have to solve a
2463 Diophantine equation. This is the most time consuming part.
2465 For answering to the question: "Is there a dependence?" we have
2466 to prove that there exists a solution to the Diophantine
2467 equation, and that the solution is in the iteration domain,
2468 i.e. the solution is positive or zero, and that the solution
2469 happens before the upper bound loop.nb_iterations. Otherwise
2470 there is no dependence. This function outputs a description of
2471 the iterations that hold the intersections. */
2473 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2474 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2476 gcc_obstack_init (&scratch_obstack
);
2478 dim
= nb_vars_a
+ nb_vars_b
;
2479 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2480 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2481 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2483 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2484 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2485 gamma
= init_b
- init_a
;
2487 /* Don't do all the hard work of solving the Diophantine equation
2488 when we already know the solution: for example,
2491 | gamma = 3 - 3 = 0.
2492 Then the first overlap occurs during the first iterations:
2493 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2497 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2499 HOST_WIDE_INT step_a
, step_b
;
2500 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2503 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2504 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2505 niter
= MIN (niter_a
, niter_b
);
2506 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2507 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2509 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2512 *overlaps_a
= conflict_fn (1, ova
);
2513 *overlaps_b
= conflict_fn (1, ovb
);
2516 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2517 compute_overlap_steps_for_affine_1_2
2518 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2520 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2521 compute_overlap_steps_for_affine_1_2
2522 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2526 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2527 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2528 *overlaps_a
= conflict_fn_not_known ();
2529 *overlaps_b
= conflict_fn_not_known ();
2530 *last_conflicts
= chrec_dont_know
;
2532 goto end_analyze_subs_aa
;
2536 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2541 lambda_matrix_row_negate (U
, dim
, 0);
2543 gcd_alpha_beta
= S
[0][0];
2545 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2546 but that is a quite strange case. Instead of ICEing, answer
2548 if (gcd_alpha_beta
== 0)
2550 *overlaps_a
= conflict_fn_not_known ();
2551 *overlaps_b
= conflict_fn_not_known ();
2552 *last_conflicts
= chrec_dont_know
;
2553 goto end_analyze_subs_aa
;
2556 /* The classic "gcd-test". */
2557 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2559 /* The "gcd-test" has determined that there is no integer
2560 solution, i.e. there is no dependence. */
2561 *overlaps_a
= conflict_fn_no_dependence ();
2562 *overlaps_b
= conflict_fn_no_dependence ();
2563 *last_conflicts
= integer_zero_node
;
2566 /* Both access functions are univariate. This includes SIV and MIV cases. */
2567 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2569 /* Both functions should have the same evolution sign. */
2570 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2571 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2573 /* The solutions are given by:
2575 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2578 For a given integer t. Using the following variables,
2580 | i0 = u11 * gamma / gcd_alpha_beta
2581 | j0 = u12 * gamma / gcd_alpha_beta
2588 | y0 = j0 + j1 * t. */
2589 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2591 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2592 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2596 if ((i1
== 0 && i0
< 0)
2597 || (j1
== 0 && j0
< 0))
2599 /* There is no solution.
2600 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2601 falls in here, but for the moment we don't look at the
2602 upper bound of the iteration domain. */
2603 *overlaps_a
= conflict_fn_no_dependence ();
2604 *overlaps_b
= conflict_fn_no_dependence ();
2605 *last_conflicts
= integer_zero_node
;
2606 goto end_analyze_subs_aa
;
2609 if (i1
> 0 && j1
> 0)
2611 HOST_WIDE_INT niter_a
2612 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2613 HOST_WIDE_INT niter_b
2614 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2615 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2617 /* (X0, Y0) is a solution of the Diophantine equation:
2618 "chrec_a (X0) = chrec_b (Y0)". */
2619 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2621 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2622 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2624 /* (X1, Y1) is the smallest positive solution of the eq
2625 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2626 first conflict occurs. */
2627 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2628 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2629 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2633 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2634 FLOOR_DIV (niter
- j0
, j1
));
2635 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2637 /* If the overlap occurs outside of the bounds of the
2638 loop, there is no dependence. */
2639 if (x1
>= niter
|| y1
>= niter
)
2641 *overlaps_a
= conflict_fn_no_dependence ();
2642 *overlaps_b
= conflict_fn_no_dependence ();
2643 *last_conflicts
= integer_zero_node
;
2644 goto end_analyze_subs_aa
;
2647 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2650 *last_conflicts
= chrec_dont_know
;
2654 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2656 build_int_cst (NULL_TREE
, i1
)));
2659 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2661 build_int_cst (NULL_TREE
, j1
)));
2665 /* FIXME: For the moment, the upper bound of the
2666 iteration domain for i and j is not checked. */
2667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2668 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2669 *overlaps_a
= conflict_fn_not_known ();
2670 *overlaps_b
= conflict_fn_not_known ();
2671 *last_conflicts
= chrec_dont_know
;
2676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2677 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2678 *overlaps_a
= conflict_fn_not_known ();
2679 *overlaps_b
= conflict_fn_not_known ();
2680 *last_conflicts
= chrec_dont_know
;
2685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2686 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2687 *overlaps_a
= conflict_fn_not_known ();
2688 *overlaps_b
= conflict_fn_not_known ();
2689 *last_conflicts
= chrec_dont_know
;
2692 end_analyze_subs_aa
:
2693 obstack_free (&scratch_obstack
, NULL
);
2694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2696 fprintf (dump_file
, " (overlaps_a = ");
2697 dump_conflict_function (dump_file
, *overlaps_a
);
2698 fprintf (dump_file
, ")\n (overlaps_b = ");
2699 dump_conflict_function (dump_file
, *overlaps_b
);
2700 fprintf (dump_file
, "))\n");
2704 /* Returns true when analyze_subscript_affine_affine can be used for
2705 determining the dependence relation between chrec_a and chrec_b,
2706 that contain symbols. This function modifies chrec_a and chrec_b
2707 such that the analysis result is the same, and such that they don't
2708 contain symbols, and then can safely be passed to the analyzer.
2710 Example: The analysis of the following tuples of evolutions produce
2711 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2714 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2715 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2719 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2721 tree diff
, type
, left_a
, left_b
, right_b
;
2723 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2724 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2725 /* FIXME: For the moment not handled. Might be refined later. */
2728 type
= chrec_type (*chrec_a
);
2729 left_a
= CHREC_LEFT (*chrec_a
);
2730 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2731 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2733 if (!evolution_function_is_constant_p (diff
))
2736 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2737 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2739 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2740 diff
, CHREC_RIGHT (*chrec_a
));
2741 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2742 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2743 build_int_cst (type
, 0),
2748 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2749 *OVERLAPS_B are initialized to the functions that describe the
2750 relation between the elements accessed twice by CHREC_A and
2751 CHREC_B. For k >= 0, the following property is verified:
2753 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2756 analyze_siv_subscript (tree chrec_a
,
2758 conflict_function
**overlaps_a
,
2759 conflict_function
**overlaps_b
,
2760 tree
*last_conflicts
,
2763 dependence_stats
.num_siv
++;
2765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2766 fprintf (dump_file
, "(analyze_siv_subscript \n");
2768 if (evolution_function_is_constant_p (chrec_a
)
2769 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2770 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2771 overlaps_a
, overlaps_b
, last_conflicts
);
2773 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2774 && evolution_function_is_constant_p (chrec_b
))
2775 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2776 overlaps_b
, overlaps_a
, last_conflicts
);
2778 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2779 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2781 if (!chrec_contains_symbols (chrec_a
)
2782 && !chrec_contains_symbols (chrec_b
))
2784 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2785 overlaps_a
, overlaps_b
,
2788 if (CF_NOT_KNOWN_P (*overlaps_a
)
2789 || CF_NOT_KNOWN_P (*overlaps_b
))
2790 dependence_stats
.num_siv_unimplemented
++;
2791 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2792 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2793 dependence_stats
.num_siv_independent
++;
2795 dependence_stats
.num_siv_dependent
++;
2797 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2800 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2801 overlaps_a
, overlaps_b
,
2804 if (CF_NOT_KNOWN_P (*overlaps_a
)
2805 || CF_NOT_KNOWN_P (*overlaps_b
))
2806 dependence_stats
.num_siv_unimplemented
++;
2807 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2808 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2809 dependence_stats
.num_siv_independent
++;
2811 dependence_stats
.num_siv_dependent
++;
2814 goto siv_subscript_dontknow
;
2819 siv_subscript_dontknow
:;
2820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2821 fprintf (dump_file
, " siv test failed: unimplemented");
2822 *overlaps_a
= conflict_fn_not_known ();
2823 *overlaps_b
= conflict_fn_not_known ();
2824 *last_conflicts
= chrec_dont_know
;
2825 dependence_stats
.num_siv_unimplemented
++;
2828 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2829 fprintf (dump_file
, ")\n");
2832 /* Returns false if we can prove that the greatest common divisor of the steps
2833 of CHREC does not divide CST, false otherwise. */
2836 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2838 HOST_WIDE_INT cd
= 0, val
;
2841 if (!tree_fits_shwi_p (cst
))
2843 val
= tree_to_shwi (cst
);
2845 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2847 step
= CHREC_RIGHT (chrec
);
2848 if (!tree_fits_shwi_p (step
))
2850 cd
= gcd (cd
, tree_to_shwi (step
));
2851 chrec
= CHREC_LEFT (chrec
);
2854 return val
% cd
== 0;
2857 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2858 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2859 functions that describe the relation between the elements accessed
2860 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2863 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2866 analyze_miv_subscript (tree chrec_a
,
2868 conflict_function
**overlaps_a
,
2869 conflict_function
**overlaps_b
,
2870 tree
*last_conflicts
,
2871 struct loop
*loop_nest
)
2873 tree type
, difference
;
2875 dependence_stats
.num_miv
++;
2876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2877 fprintf (dump_file
, "(analyze_miv_subscript \n");
2879 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2880 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2881 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2882 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2884 if (eq_evolutions_p (chrec_a
, chrec_b
))
2886 /* Access functions are the same: all the elements are accessed
2887 in the same order. */
2888 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2889 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2890 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2891 dependence_stats
.num_miv_dependent
++;
2894 else if (evolution_function_is_constant_p (difference
)
2895 /* For the moment, the following is verified:
2896 evolution_function_is_affine_multivariate_p (chrec_a,
2898 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2900 /* testsuite/.../ssa-chrec-33.c
2901 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2903 The difference is 1, and all the evolution steps are multiples
2904 of 2, consequently there are no overlapping elements. */
2905 *overlaps_a
= conflict_fn_no_dependence ();
2906 *overlaps_b
= conflict_fn_no_dependence ();
2907 *last_conflicts
= integer_zero_node
;
2908 dependence_stats
.num_miv_independent
++;
2911 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2912 && !chrec_contains_symbols (chrec_a
)
2913 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2914 && !chrec_contains_symbols (chrec_b
))
2916 /* testsuite/.../ssa-chrec-35.c
2917 {0, +, 1}_2 vs. {0, +, 1}_3
2918 the overlapping elements are respectively located at iterations:
2919 {0, +, 1}_x and {0, +, 1}_x,
2920 in other words, we have the equality:
2921 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2924 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2925 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2927 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2928 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2930 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2931 overlaps_a
, overlaps_b
, last_conflicts
);
2933 if (CF_NOT_KNOWN_P (*overlaps_a
)
2934 || CF_NOT_KNOWN_P (*overlaps_b
))
2935 dependence_stats
.num_miv_unimplemented
++;
2936 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2937 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2938 dependence_stats
.num_miv_independent
++;
2940 dependence_stats
.num_miv_dependent
++;
2945 /* When the analysis is too difficult, answer "don't know". */
2946 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2947 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
2949 *overlaps_a
= conflict_fn_not_known ();
2950 *overlaps_b
= conflict_fn_not_known ();
2951 *last_conflicts
= chrec_dont_know
;
2952 dependence_stats
.num_miv_unimplemented
++;
2955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2956 fprintf (dump_file
, ")\n");
2959 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2960 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2961 OVERLAP_ITERATIONS_B are initialized with two functions that
2962 describe the iterations that contain conflicting elements.
2964 Remark: For an integer k >= 0, the following equality is true:
2966 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2970 analyze_overlapping_iterations (tree chrec_a
,
2972 conflict_function
**overlap_iterations_a
,
2973 conflict_function
**overlap_iterations_b
,
2974 tree
*last_conflicts
, struct loop
*loop_nest
)
2976 unsigned int lnn
= loop_nest
->num
;
2978 dependence_stats
.num_subscript_tests
++;
2980 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2982 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
2983 fprintf (dump_file
, " (chrec_a = ");
2984 print_generic_expr (dump_file
, chrec_a
, 0);
2985 fprintf (dump_file
, ")\n (chrec_b = ");
2986 print_generic_expr (dump_file
, chrec_b
, 0);
2987 fprintf (dump_file
, ")\n");
2990 if (chrec_a
== NULL_TREE
2991 || chrec_b
== NULL_TREE
2992 || chrec_contains_undetermined (chrec_a
)
2993 || chrec_contains_undetermined (chrec_b
))
2995 dependence_stats
.num_subscript_undetermined
++;
2997 *overlap_iterations_a
= conflict_fn_not_known ();
2998 *overlap_iterations_b
= conflict_fn_not_known ();
3001 /* If they are the same chrec, and are affine, they overlap
3002 on every iteration. */
3003 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3004 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3005 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3007 dependence_stats
.num_same_subscript_function
++;
3008 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3009 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3010 *last_conflicts
= chrec_dont_know
;
3013 /* If they aren't the same, and aren't affine, we can't do anything
3015 else if ((chrec_contains_symbols (chrec_a
)
3016 || chrec_contains_symbols (chrec_b
))
3017 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3018 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3020 dependence_stats
.num_subscript_undetermined
++;
3021 *overlap_iterations_a
= conflict_fn_not_known ();
3022 *overlap_iterations_b
= conflict_fn_not_known ();
3025 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3026 analyze_ziv_subscript (chrec_a
, chrec_b
,
3027 overlap_iterations_a
, overlap_iterations_b
,
3030 else if (siv_subscript_p (chrec_a
, chrec_b
))
3031 analyze_siv_subscript (chrec_a
, chrec_b
,
3032 overlap_iterations_a
, overlap_iterations_b
,
3033 last_conflicts
, lnn
);
3036 analyze_miv_subscript (chrec_a
, chrec_b
,
3037 overlap_iterations_a
, overlap_iterations_b
,
3038 last_conflicts
, loop_nest
);
3040 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3042 fprintf (dump_file
, " (overlap_iterations_a = ");
3043 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3044 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3045 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3046 fprintf (dump_file
, "))\n");
3050 /* Helper function for uniquely inserting distance vectors. */
3053 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3058 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3059 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3062 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3065 /* Helper function for uniquely inserting direction vectors. */
3068 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3073 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3074 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3077 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3080 /* Add a distance of 1 on all the loops outer than INDEX. If we
3081 haven't yet determined a distance for this outer loop, push a new
3082 distance vector composed of the previous distance, and a distance
3083 of 1 for this outer loop. Example:
3091 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3092 save (0, 1), then we have to save (1, 0). */
3095 add_outer_distances (struct data_dependence_relation
*ddr
,
3096 lambda_vector dist_v
, int index
)
3098 /* For each outer loop where init_v is not set, the accesses are
3099 in dependence of distance 1 in the loop. */
3100 while (--index
>= 0)
3102 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3103 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3105 save_dist_v (ddr
, save_v
);
3109 /* Return false when fail to represent the data dependence as a
3110 distance vector. INIT_B is set to true when a component has been
3111 added to the distance vector DIST_V. INDEX_CARRY is then set to
3112 the index in DIST_V that carries the dependence. */
3115 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3116 struct data_reference
*ddr_a
,
3117 struct data_reference
*ddr_b
,
3118 lambda_vector dist_v
, bool *init_b
,
3122 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3124 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3126 tree access_fn_a
, access_fn_b
;
3127 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3129 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3131 non_affine_dependence_relation (ddr
);
3135 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3136 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3138 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3139 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3142 int var_a
= CHREC_VARIABLE (access_fn_a
);
3143 int var_b
= CHREC_VARIABLE (access_fn_b
);
3146 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3148 non_affine_dependence_relation (ddr
);
3152 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3153 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3154 *index_carry
= MIN (index
, *index_carry
);
3156 /* This is the subscript coupling test. If we have already
3157 recorded a distance for this loop (a distance coming from
3158 another subscript), it should be the same. For example,
3159 in the following code, there is no dependence:
3166 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3168 finalize_ddr_dependent (ddr
, chrec_known
);
3172 dist_v
[index
] = dist
;
3176 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3178 /* This can be for example an affine vs. constant dependence
3179 (T[i] vs. T[3]) that is not an affine dependence and is
3180 not representable as a distance vector. */
3181 non_affine_dependence_relation (ddr
);
3189 /* Return true when the DDR contains only constant access functions. */
3192 constant_access_functions (const struct data_dependence_relation
*ddr
)
3196 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3197 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3198 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3204 /* Helper function for the case where DDR_A and DDR_B are the same
3205 multivariate access function with a constant step. For an example
3209 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3212 tree c_1
= CHREC_LEFT (c_2
);
3213 tree c_0
= CHREC_LEFT (c_1
);
3214 lambda_vector dist_v
;
3217 /* Polynomials with more than 2 variables are not handled yet. When
3218 the evolution steps are parameters, it is not possible to
3219 represent the dependence using classical distance vectors. */
3220 if (TREE_CODE (c_0
) != INTEGER_CST
3221 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3222 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3224 DDR_AFFINE_P (ddr
) = false;
3228 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3229 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3231 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3232 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3233 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3234 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3247 save_dist_v (ddr
, dist_v
);
3249 add_outer_distances (ddr
, dist_v
, x_1
);
3252 /* Helper function for the case where DDR_A and DDR_B are the same
3253 access functions. */
3256 add_other_self_distances (struct data_dependence_relation
*ddr
)
3258 lambda_vector dist_v
;
3260 int index_carry
= DDR_NB_LOOPS (ddr
);
3262 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3264 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3266 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3268 if (!evolution_function_is_univariate_p (access_fun
))
3270 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3272 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3276 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3278 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3279 add_multivariate_self_dist (ddr
, access_fun
);
3281 /* The evolution step is not constant: it varies in
3282 the outer loop, so this cannot be represented by a
3283 distance vector. For example in pr34635.c the
3284 evolution is {0, +, {0, +, 4}_1}_2. */
3285 DDR_AFFINE_P (ddr
) = false;
3290 index_carry
= MIN (index_carry
,
3291 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3292 DDR_LOOP_NEST (ddr
)));
3296 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3297 add_outer_distances (ddr
, dist_v
, index_carry
);
3301 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3303 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3305 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3306 save_dist_v (ddr
, dist_v
);
3309 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3310 is the case for example when access functions are the same and
3311 equal to a constant, as in:
3318 in which case the distance vectors are (0) and (1). */
3321 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3325 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3327 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3328 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3329 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3331 for (j
= 0; j
< ca
->n
; j
++)
3332 if (affine_function_zero_p (ca
->fns
[j
]))
3334 insert_innermost_unit_dist_vector (ddr
);
3338 for (j
= 0; j
< cb
->n
; j
++)
3339 if (affine_function_zero_p (cb
->fns
[j
]))
3341 insert_innermost_unit_dist_vector (ddr
);
3347 /* Compute the classic per loop distance vector. DDR is the data
3348 dependence relation to build a vector from. Return false when fail
3349 to represent the data dependence as a distance vector. */
3352 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3353 struct loop
*loop_nest
)
3355 bool init_b
= false;
3356 int index_carry
= DDR_NB_LOOPS (ddr
);
3357 lambda_vector dist_v
;
3359 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3362 if (same_access_functions (ddr
))
3364 /* Save the 0 vector. */
3365 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3366 save_dist_v (ddr
, dist_v
);
3368 if (constant_access_functions (ddr
))
3369 add_distance_for_zero_overlaps (ddr
);
3371 if (DDR_NB_LOOPS (ddr
) > 1)
3372 add_other_self_distances (ddr
);
3377 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3378 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3379 dist_v
, &init_b
, &index_carry
))
3382 /* Save the distance vector if we initialized one. */
3385 /* Verify a basic constraint: classic distance vectors should
3386 always be lexicographically positive.
3388 Data references are collected in the order of execution of
3389 the program, thus for the following loop
3391 | for (i = 1; i < 100; i++)
3392 | for (j = 1; j < 100; j++)
3394 | t = T[j+1][i-1]; // A
3395 | T[j][i] = t + 2; // B
3398 references are collected following the direction of the wind:
3399 A then B. The data dependence tests are performed also
3400 following this order, such that we're looking at the distance
3401 separating the elements accessed by A from the elements later
3402 accessed by B. But in this example, the distance returned by
3403 test_dep (A, B) is lexicographically negative (-1, 1), that
3404 means that the access A occurs later than B with respect to
3405 the outer loop, ie. we're actually looking upwind. In this
3406 case we solve test_dep (B, A) looking downwind to the
3407 lexicographically positive solution, that returns the
3408 distance vector (1, -1). */
3409 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3411 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3412 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3415 compute_subscript_distance (ddr
);
3416 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3417 save_v
, &init_b
, &index_carry
))
3419 save_dist_v (ddr
, save_v
);
3420 DDR_REVERSED_P (ddr
) = true;
3422 /* In this case there is a dependence forward for all the
3425 | for (k = 1; k < 100; k++)
3426 | for (i = 1; i < 100; i++)
3427 | for (j = 1; j < 100; j++)
3429 | t = T[j+1][i-1]; // A
3430 | T[j][i] = t + 2; // B
3438 if (DDR_NB_LOOPS (ddr
) > 1)
3440 add_outer_distances (ddr
, save_v
, index_carry
);
3441 add_outer_distances (ddr
, dist_v
, index_carry
);
3446 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3447 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3449 if (DDR_NB_LOOPS (ddr
) > 1)
3451 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3453 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3454 DDR_A (ddr
), loop_nest
))
3456 compute_subscript_distance (ddr
);
3457 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3458 opposite_v
, &init_b
,
3462 save_dist_v (ddr
, save_v
);
3463 add_outer_distances (ddr
, dist_v
, index_carry
);
3464 add_outer_distances (ddr
, opposite_v
, index_carry
);
3467 save_dist_v (ddr
, save_v
);
3472 /* There is a distance of 1 on all the outer loops: Example:
3473 there is a dependence of distance 1 on loop_1 for the array A.
3479 add_outer_distances (ddr
, dist_v
,
3480 lambda_vector_first_nz (dist_v
,
3481 DDR_NB_LOOPS (ddr
), 0));
3484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3488 fprintf (dump_file
, "(build_classic_dist_vector\n");
3489 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3491 fprintf (dump_file
, " dist_vector = (");
3492 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3493 DDR_NB_LOOPS (ddr
));
3494 fprintf (dump_file
, " )\n");
3496 fprintf (dump_file
, ")\n");
3502 /* Return the direction for a given distance.
3503 FIXME: Computing dir this way is suboptimal, since dir can catch
3504 cases that dist is unable to represent. */
3506 static inline enum data_dependence_direction
3507 dir_from_dist (int dist
)
3510 return dir_positive
;
3512 return dir_negative
;
3517 /* Compute the classic per loop direction vector. DDR is the data
3518 dependence relation to build a vector from. */
3521 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3524 lambda_vector dist_v
;
3526 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3528 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3530 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3531 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3533 save_dir_v (ddr
, dir_v
);
3537 /* Helper function. Returns true when there is a dependence between
3538 data references DRA and DRB. */
3541 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3542 struct data_reference
*dra
,
3543 struct data_reference
*drb
,
3544 struct loop
*loop_nest
)
3547 tree last_conflicts
;
3548 struct subscript
*subscript
;
3549 tree res
= NULL_TREE
;
3551 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3553 conflict_function
*overlaps_a
, *overlaps_b
;
3555 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3556 DR_ACCESS_FN (drb
, i
),
3557 &overlaps_a
, &overlaps_b
,
3558 &last_conflicts
, loop_nest
);
3560 if (SUB_CONFLICTS_IN_A (subscript
))
3561 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3562 if (SUB_CONFLICTS_IN_B (subscript
))
3563 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3565 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3566 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3567 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3569 /* If there is any undetermined conflict function we have to
3570 give a conservative answer in case we cannot prove that
3571 no dependence exists when analyzing another subscript. */
3572 if (CF_NOT_KNOWN_P (overlaps_a
)
3573 || CF_NOT_KNOWN_P (overlaps_b
))
3575 res
= chrec_dont_know
;
3579 /* When there is a subscript with no dependence we can stop. */
3580 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3581 || CF_NO_DEPENDENCE_P (overlaps_b
))
3588 if (res
== NULL_TREE
)
3591 if (res
== chrec_known
)
3592 dependence_stats
.num_dependence_independent
++;
3594 dependence_stats
.num_dependence_undetermined
++;
3595 finalize_ddr_dependent (ddr
, res
);
3599 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3602 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3603 struct loop
*loop_nest
)
3605 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3606 dependence_stats
.num_dependence_dependent
++;
3608 compute_subscript_distance (ddr
);
3609 if (build_classic_dist_vector (ddr
, loop_nest
))
3610 build_classic_dir_vector (ddr
);
3613 /* Returns true when all the access functions of A are affine or
3614 constant with respect to LOOP_NEST. */
3617 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3618 const struct loop
*loop_nest
)
3621 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3624 FOR_EACH_VEC_ELT (fns
, i
, t
)
3625 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3626 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3632 /* Initializes an equation for an OMEGA problem using the information
3633 contained in the ACCESS_FUN. Returns true when the operation
3636 PB is the omega constraint system.
3637 EQ is the number of the equation to be initialized.
3638 OFFSET is used for shifting the variables names in the constraints:
3639 a constrain is composed of 2 * the number of variables surrounding
3640 dependence accesses. OFFSET is set either to 0 for the first n variables,
3641 then it is set to n.
3642 ACCESS_FUN is expected to be an affine chrec. */
3645 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3646 unsigned int offset
, tree access_fun
,
3647 struct data_dependence_relation
*ddr
)
3649 switch (TREE_CODE (access_fun
))
3651 case POLYNOMIAL_CHREC
:
3653 tree left
= CHREC_LEFT (access_fun
);
3654 tree right
= CHREC_RIGHT (access_fun
);
3655 int var
= CHREC_VARIABLE (access_fun
);
3658 if (TREE_CODE (right
) != INTEGER_CST
)
3661 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3662 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3664 /* Compute the innermost loop index. */
3665 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3668 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3669 += int_cst_value (right
);
3671 switch (TREE_CODE (left
))
3673 case POLYNOMIAL_CHREC
:
3674 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3677 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3686 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3694 /* As explained in the comments preceding init_omega_for_ddr, we have
3695 to set up a system for each loop level, setting outer loops
3696 variation to zero, and current loop variation to positive or zero.
3697 Save each lexico positive distance vector. */
3700 omega_extract_distance_vectors (omega_pb pb
,
3701 struct data_dependence_relation
*ddr
)
3705 struct loop
*loopi
, *loopj
;
3706 enum omega_result res
;
3708 /* Set a new problem for each loop in the nest. The basis is the
3709 problem that we have initialized until now. On top of this we
3710 add new constraints. */
3711 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3712 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3715 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3716 DDR_NB_LOOPS (ddr
));
3718 omega_copy_problem (copy
, pb
);
3720 /* For all the outer loops "loop_j", add "dj = 0". */
3721 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3723 eq
= omega_add_zero_eq (copy
, omega_black
);
3724 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3727 /* For "loop_i", add "0 <= di". */
3728 geq
= omega_add_zero_geq (copy
, omega_black
);
3729 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3731 /* Reduce the constraint system, and test that the current
3732 problem is feasible. */
3733 res
= omega_simplify_problem (copy
);
3734 if (res
== omega_false
3735 || res
== omega_unknown
3736 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3739 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3740 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3742 dist
= copy
->subs
[eq
].coef
[0];
3748 /* Reinitialize problem... */
3749 omega_copy_problem (copy
, pb
);
3750 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3752 eq
= omega_add_zero_eq (copy
, omega_black
);
3753 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3756 /* ..., but this time "di = 1". */
3757 eq
= omega_add_zero_eq (copy
, omega_black
);
3758 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3759 copy
->eqs
[eq
].coef
[0] = -1;
3761 res
= omega_simplify_problem (copy
);
3762 if (res
== omega_false
3763 || res
== omega_unknown
3764 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3767 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3768 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3770 dist
= copy
->subs
[eq
].coef
[0];
3776 /* Save the lexicographically positive distance vector. */
3779 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3780 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3784 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3785 if (copy
->subs
[eq
].key
> 0)
3787 dist
= copy
->subs
[eq
].coef
[0];
3788 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3791 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3792 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3794 save_dist_v (ddr
, dist_v
);
3795 save_dir_v (ddr
, dir_v
);
3799 omega_free_problem (copy
);
3803 /* This is called for each subscript of a tuple of data references:
3804 insert an equality for representing the conflicts. */
3807 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3808 struct data_dependence_relation
*ddr
,
3809 omega_pb pb
, bool *maybe_dependent
)
3812 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3813 TREE_TYPE (access_fun_b
));
3814 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3815 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3816 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3819 /* When the fun_a - fun_b is not constant, the dependence is not
3820 captured by the classic distance vector representation. */
3821 if (TREE_CODE (difference
) != INTEGER_CST
)
3825 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3827 /* There is no dependence. */
3828 *maybe_dependent
= false;
3832 minus_one
= build_int_cst (type
, -1);
3833 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3835 eq
= omega_add_zero_eq (pb
, omega_black
);
3836 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3837 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3838 /* There is probably a dependence, but the system of
3839 constraints cannot be built: answer "don't know". */
3843 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3844 && !int_divides_p (lambda_vector_gcd
3845 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3846 2 * DDR_NB_LOOPS (ddr
)),
3847 pb
->eqs
[eq
].coef
[0]))
3849 /* There is no dependence. */
3850 *maybe_dependent
= false;
3857 /* Helper function, same as init_omega_for_ddr but specialized for
3858 data references A and B. */
3861 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3862 struct data_dependence_relation
*ddr
,
3863 omega_pb pb
, bool *maybe_dependent
)
3868 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3870 /* Insert an equality per subscript. */
3871 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3873 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3874 ddr
, pb
, maybe_dependent
))
3876 else if (*maybe_dependent
== false)
3878 /* There is no dependence. */
3879 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3884 /* Insert inequalities: constraints corresponding to the iteration
3885 domain, i.e. the loops surrounding the references "loop_x" and
3886 the distance variables "dx". The layout of the OMEGA
3887 representation is as follows:
3888 - coef[0] is the constant
3889 - coef[1..nb_loops] are the protected variables that will not be
3890 removed by the solver: the "dx"
3891 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3893 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3894 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3896 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
);
3899 ineq
= omega_add_zero_geq (pb
, omega_black
);
3900 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3902 /* 0 <= loop_x + dx */
3903 ineq
= omega_add_zero_geq (pb
, omega_black
);
3904 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3905 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3909 /* loop_x <= nb_iters */
3910 ineq
= omega_add_zero_geq (pb
, omega_black
);
3911 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3912 pb
->geqs
[ineq
].coef
[0] = nbi
;
3914 /* loop_x + dx <= nb_iters */
3915 ineq
= omega_add_zero_geq (pb
, omega_black
);
3916 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3917 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3918 pb
->geqs
[ineq
].coef
[0] = nbi
;
3920 /* A step "dx" bigger than nb_iters is not feasible, so
3921 add "0 <= nb_iters + dx", */
3922 ineq
= omega_add_zero_geq (pb
, omega_black
);
3923 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3924 pb
->geqs
[ineq
].coef
[0] = nbi
;
3925 /* and "dx <= nb_iters". */
3926 ineq
= omega_add_zero_geq (pb
, omega_black
);
3927 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3928 pb
->geqs
[ineq
].coef
[0] = nbi
;
3932 omega_extract_distance_vectors (pb
, ddr
);
3937 /* Sets up the Omega dependence problem for the data dependence
3938 relation DDR. Returns false when the constraint system cannot be
3939 built, ie. when the test answers "don't know". Returns true
3940 otherwise, and when independence has been proved (using one of the
3941 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3942 set MAYBE_DEPENDENT to true.
3944 Example: for setting up the dependence system corresponding to the
3945 conflicting accesses
3950 | ... A[2*j, 2*(i + j)]
3954 the following constraints come from the iteration domain:
3961 where di, dj are the distance variables. The constraints
3962 representing the conflicting elements are:
3965 i + 1 = 2 * (i + di + j + dj)
3967 For asking that the resulting distance vector (di, dj) be
3968 lexicographically positive, we insert the constraint "di >= 0". If
3969 "di = 0" in the solution, we fix that component to zero, and we
3970 look at the inner loops: we set a new problem where all the outer
3971 loop distances are zero, and fix this inner component to be
3972 positive. When one of the components is positive, we save that
3973 distance, and set a new problem where the distance on this loop is
3974 zero, searching for other distances in the inner loops. Here is
3975 the classic example that illustrates that we have to set for each
3976 inner loop a new problem:
3984 we have to save two distances (1, 0) and (0, 1).
3986 Given two array references, refA and refB, we have to set the
3987 dependence problem twice, refA vs. refB and refB vs. refA, and we
3988 cannot do a single test, as refB might occur before refA in the
3989 inner loops, and the contrary when considering outer loops: ex.
3994 | T[{1,+,1}_2][{1,+,1}_1] // refA
3995 | T[{2,+,1}_2][{0,+,1}_1] // refB
4000 refB touches the elements in T before refA, and thus for the same
4001 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4002 but for successive loop_0 iterations, we have (1, -1, 1)
4004 The Omega solver expects the distance variables ("di" in the
4005 previous example) to come first in the constraint system (as
4006 variables to be protected, or "safe" variables), the constraint
4007 system is built using the following layout:
4009 "cst | distance vars | index vars".
4013 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
4014 bool *maybe_dependent
)
4019 *maybe_dependent
= true;
4021 if (same_access_functions (ddr
))
4024 lambda_vector dir_v
;
4026 /* Save the 0 vector. */
4027 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4028 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4029 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4030 dir_v
[j
] = dir_equal
;
4031 save_dir_v (ddr
, dir_v
);
4033 /* Save the dependences carried by outer loops. */
4034 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4035 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4037 omega_free_problem (pb
);
4041 /* Omega expects the protected variables (those that have to be kept
4042 after elimination) to appear first in the constraint system.
4043 These variables are the distance variables. In the following
4044 initialization we declare NB_LOOPS safe variables, and the total
4045 number of variables for the constraint system is 2*NB_LOOPS. */
4046 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4047 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4049 omega_free_problem (pb
);
4051 /* Stop computation if not decidable, or no dependence. */
4052 if (res
== false || *maybe_dependent
== false)
4055 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4056 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
4058 omega_free_problem (pb
);
4063 /* Return true when DDR contains the same information as that stored
4064 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4067 ddr_consistent_p (FILE *file
,
4068 struct data_dependence_relation
*ddr
,
4069 vec
<lambda_vector
> dist_vects
,
4070 vec
<lambda_vector
> dir_vects
)
4074 /* If dump_file is set, output there. */
4075 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4078 if (dist_vects
.length () != DDR_NUM_DIST_VECTS (ddr
))
4080 lambda_vector b_dist_v
;
4081 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4082 dist_vects
.length (),
4083 DDR_NUM_DIST_VECTS (ddr
));
4085 fprintf (file
, "Banerjee dist vectors:\n");
4086 FOR_EACH_VEC_ELT (dist_vects
, i
, b_dist_v
)
4087 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
4089 fprintf (file
, "Omega dist vectors:\n");
4090 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4091 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
4093 fprintf (file
, "data dependence relation:\n");
4094 dump_data_dependence_relation (file
, ddr
);
4096 fprintf (file
, ")\n");
4100 if (dir_vects
.length () != DDR_NUM_DIR_VECTS (ddr
))
4102 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4103 dir_vects
.length (),
4104 DDR_NUM_DIR_VECTS (ddr
));
4108 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4110 lambda_vector a_dist_v
;
4111 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
4113 /* Distance vectors are not ordered in the same way in the DDR
4114 and in the DIST_VECTS: search for a matching vector. */
4115 FOR_EACH_VEC_ELT (dist_vects
, j
, a_dist_v
)
4116 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
4119 if (j
== dist_vects
.length ())
4121 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
4122 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
4123 fprintf (file
, "not found in Omega dist vectors:\n");
4124 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4125 fprintf (file
, "data dependence relation:\n");
4126 dump_data_dependence_relation (file
, ddr
);
4127 fprintf (file
, ")\n");
4131 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
4133 lambda_vector a_dir_v
;
4134 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
4136 /* Direction vectors are not ordered in the same way in the DDR
4137 and in the DIR_VECTS: search for a matching vector. */
4138 FOR_EACH_VEC_ELT (dir_vects
, j
, a_dir_v
)
4139 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4142 if (j
== dist_vects
.length ())
4144 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4145 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4146 fprintf (file
, "not found in Omega dir vectors:\n");
4147 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4148 fprintf (file
, "data dependence relation:\n");
4149 dump_data_dependence_relation (file
, ddr
);
4150 fprintf (file
, ")\n");
4157 /* This computes the affine dependence relation between A and B with
4158 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4159 independence between two accesses, while CHREC_DONT_KNOW is used
4160 for representing the unknown relation.
4162 Note that it is possible to stop the computation of the dependence
4163 relation the first time we detect a CHREC_KNOWN element for a given
4167 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4168 struct loop
*loop_nest
)
4170 struct data_reference
*dra
= DDR_A (ddr
);
4171 struct data_reference
*drb
= DDR_B (ddr
);
4173 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4175 fprintf (dump_file
, "(compute_affine_dependence\n");
4176 fprintf (dump_file
, " stmt_a: ");
4177 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4178 fprintf (dump_file
, " stmt_b: ");
4179 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4182 /* Analyze only when the dependence relation is not yet known. */
4183 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4185 dependence_stats
.num_dependence_tests
++;
4187 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4188 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4190 subscript_dependence_tester (ddr
, loop_nest
);
4192 if (flag_check_data_deps
)
4194 /* Dump the dependences from the first algorithm. */
4195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4197 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4198 dump_data_dependence_relation (dump_file
, ddr
);
4201 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4203 bool maybe_dependent
;
4204 vec
<lambda_vector
> dir_vects
, dist_vects
;
4206 /* Save the result of the first DD analyzer. */
4207 dist_vects
= DDR_DIST_VECTS (ddr
);
4208 dir_vects
= DDR_DIR_VECTS (ddr
);
4210 /* Reset the information. */
4211 DDR_DIST_VECTS (ddr
).create (0);
4212 DDR_DIR_VECTS (ddr
).create (0);
4214 /* Compute the same information using Omega. */
4215 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4216 goto csys_dont_know
;
4218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4220 fprintf (dump_file
, "Omega Analyzer\n");
4221 dump_data_dependence_relation (dump_file
, ddr
);
4224 /* Check that we get the same information. */
4225 if (maybe_dependent
)
4226 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4232 /* As a last case, if the dependence cannot be determined, or if
4233 the dependence is considered too difficult to determine, answer
4238 dependence_stats
.num_dependence_undetermined
++;
4240 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4242 fprintf (dump_file
, "Data ref a:\n");
4243 dump_data_reference (dump_file
, dra
);
4244 fprintf (dump_file
, "Data ref b:\n");
4245 dump_data_reference (dump_file
, drb
);
4246 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4248 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4254 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4255 fprintf (dump_file
, ") -> no dependence\n");
4256 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4257 fprintf (dump_file
, ") -> dependence analysis failed\n");
4259 fprintf (dump_file
, ")\n");
4263 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4264 the data references in DATAREFS, in the LOOP_NEST. When
4265 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4266 relations. Return true when successful, i.e. data references number
4267 is small enough to be handled. */
4270 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4271 vec
<ddr_p
> *dependence_relations
,
4272 vec
<loop_p
> loop_nest
,
4273 bool compute_self_and_rr
)
4275 struct data_dependence_relation
*ddr
;
4276 struct data_reference
*a
, *b
;
4279 if ((int) datarefs
.length ()
4280 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4282 struct data_dependence_relation
*ddr
;
4284 /* Insert a single relation into dependence_relations:
4286 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4287 dependence_relations
->safe_push (ddr
);
4291 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4292 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4293 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4295 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4296 dependence_relations
->safe_push (ddr
);
4297 if (loop_nest
.exists ())
4298 compute_affine_dependence (ddr
, loop_nest
[0]);
4301 if (compute_self_and_rr
)
4302 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4304 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4305 dependence_relations
->safe_push (ddr
);
4306 if (loop_nest
.exists ())
4307 compute_affine_dependence (ddr
, loop_nest
[0]);
4313 /* Describes a location of a memory reference. */
4315 typedef struct data_ref_loc_d
4317 /* Position of the memory reference. */
4320 /* True if the memory reference is read. */
4325 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4326 true if STMT clobbers memory, false otherwise. */
4329 get_references_in_stmt (gimple stmt
, vec
<data_ref_loc
, va_heap
> *references
)
4331 bool clobbers_memory
= false;
4334 enum gimple_code stmt_code
= gimple_code (stmt
);
4336 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4337 As we cannot model data-references to not spelled out
4338 accesses give up if they may occur. */
4339 if (stmt_code
== GIMPLE_CALL
4340 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4342 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4343 if (gimple_call_internal_p (stmt
)
4344 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
4346 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
4347 tree uid
= gimple_call_arg (stmt
, 0);
4348 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
4350 || loop
->simduid
!= SSA_NAME_VAR (uid
))
4351 clobbers_memory
= true;
4354 clobbers_memory
= true;
4356 else if (stmt_code
== GIMPLE_ASM
4357 && (gimple_asm_volatile_p (stmt
) || gimple_vuse (stmt
)))
4358 clobbers_memory
= true;
4360 if (!gimple_vuse (stmt
))
4361 return clobbers_memory
;
4363 if (stmt_code
== GIMPLE_ASSIGN
)
4366 op0
= gimple_assign_lhs_ptr (stmt
);
4367 op1
= gimple_assign_rhs1_ptr (stmt
);
4370 || (REFERENCE_CLASS_P (*op1
)
4371 && (base
= get_base_address (*op1
))
4372 && TREE_CODE (base
) != SSA_NAME
))
4376 references
->safe_push (ref
);
4379 else if (stmt_code
== GIMPLE_CALL
)
4383 op0
= gimple_call_lhs_ptr (stmt
);
4384 n
= gimple_call_num_args (stmt
);
4385 for (i
= 0; i
< n
; i
++)
4387 op1
= gimple_call_arg_ptr (stmt
, i
);
4390 || (REFERENCE_CLASS_P (*op1
) && get_base_address (*op1
)))
4394 references
->safe_push (ref
);
4399 return clobbers_memory
;
4403 || (REFERENCE_CLASS_P (*op0
) && get_base_address (*op0
))))
4406 ref
.is_read
= false;
4407 references
->safe_push (ref
);
4409 return clobbers_memory
;
4412 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4413 reference, returns false, otherwise returns true. NEST is the outermost
4414 loop of the loop nest in which the references should be analyzed. */
4417 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4418 vec
<data_reference_p
> *datarefs
)
4421 stack_vec
<data_ref_loc
, 2> references
;
4424 data_reference_p dr
;
4426 if (get_references_in_stmt (stmt
, &references
))
4429 FOR_EACH_VEC_ELT (references
, i
, ref
)
4431 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4432 *ref
->pos
, stmt
, ref
->is_read
);
4433 gcc_assert (dr
!= NULL
);
4434 datarefs
->safe_push (dr
);
4436 references
.release ();
4440 /* Stores the data references in STMT to DATAREFS. If there is an
4441 unanalyzable reference, returns false, otherwise returns true.
4442 NEST is the outermost loop of the loop nest in which the references
4443 should be instantiated, LOOP is the loop in which the references
4444 should be analyzed. */
4447 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4448 vec
<data_reference_p
> *datarefs
)
4451 stack_vec
<data_ref_loc
, 2> references
;
4454 data_reference_p dr
;
4456 if (get_references_in_stmt (stmt
, &references
))
4459 FOR_EACH_VEC_ELT (references
, i
, ref
)
4461 dr
= create_data_ref (nest
, loop
, *ref
->pos
, stmt
, ref
->is_read
);
4462 gcc_assert (dr
!= NULL
);
4463 datarefs
->safe_push (dr
);
4466 references
.release ();
4470 /* Search the data references in LOOP, and record the information into
4471 DATAREFS. Returns chrec_dont_know when failing to analyze a
4472 difficult case, returns NULL_TREE otherwise. */
4475 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4476 vec
<data_reference_p
> *datarefs
)
4478 gimple_stmt_iterator bsi
;
4480 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4482 gimple stmt
= gsi_stmt (bsi
);
4484 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4486 struct data_reference
*res
;
4487 res
= XCNEW (struct data_reference
);
4488 datarefs
->safe_push (res
);
4490 return chrec_dont_know
;
4497 /* Search the data references in LOOP, and record the information into
4498 DATAREFS. Returns chrec_dont_know when failing to analyze a
4499 difficult case, returns NULL_TREE otherwise.
4501 TODO: This function should be made smarter so that it can handle address
4502 arithmetic as if they were array accesses, etc. */
4505 find_data_references_in_loop (struct loop
*loop
,
4506 vec
<data_reference_p
> *datarefs
)
4508 basic_block bb
, *bbs
;
4511 bbs
= get_loop_body_in_dom_order (loop
);
4513 for (i
= 0; i
< loop
->num_nodes
; i
++)
4517 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4520 return chrec_dont_know
;
4528 /* Recursive helper function. */
4531 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4533 /* Inner loops of the nest should not contain siblings. Example:
4534 when there are two consecutive loops,
4545 the dependence relation cannot be captured by the distance
4550 loop_nest
->safe_push (loop
);
4552 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4556 /* Return false when the LOOP is not well nested. Otherwise return
4557 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4558 contain the loops from the outermost to the innermost, as they will
4559 appear in the classic distance vector. */
4562 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4564 loop_nest
->safe_push (loop
);
4566 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4570 /* Returns true when the data dependences have been computed, false otherwise.
4571 Given a loop nest LOOP, the following vectors are returned:
4572 DATAREFS is initialized to all the array elements contained in this loop,
4573 DEPENDENCE_RELATIONS contains the relations between the data references.
4574 Compute read-read and self relations if
4575 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4578 compute_data_dependences_for_loop (struct loop
*loop
,
4579 bool compute_self_and_read_read_dependences
,
4580 vec
<loop_p
> *loop_nest
,
4581 vec
<data_reference_p
> *datarefs
,
4582 vec
<ddr_p
> *dependence_relations
)
4586 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4588 /* If the loop nest is not well formed, or one of the data references
4589 is not computable, give up without spending time to compute other
4592 || !find_loop_nest (loop
, loop_nest
)
4593 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4594 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4595 compute_self_and_read_read_dependences
))
4598 if (dump_file
&& (dump_flags
& TDF_STATS
))
4600 fprintf (dump_file
, "Dependence tester statistics:\n");
4602 fprintf (dump_file
, "Number of dependence tests: %d\n",
4603 dependence_stats
.num_dependence_tests
);
4604 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4605 dependence_stats
.num_dependence_dependent
);
4606 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4607 dependence_stats
.num_dependence_independent
);
4608 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4609 dependence_stats
.num_dependence_undetermined
);
4611 fprintf (dump_file
, "Number of subscript tests: %d\n",
4612 dependence_stats
.num_subscript_tests
);
4613 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4614 dependence_stats
.num_subscript_undetermined
);
4615 fprintf (dump_file
, "Number of same subscript function: %d\n",
4616 dependence_stats
.num_same_subscript_function
);
4618 fprintf (dump_file
, "Number of ziv tests: %d\n",
4619 dependence_stats
.num_ziv
);
4620 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4621 dependence_stats
.num_ziv_dependent
);
4622 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4623 dependence_stats
.num_ziv_independent
);
4624 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4625 dependence_stats
.num_ziv_unimplemented
);
4627 fprintf (dump_file
, "Number of siv tests: %d\n",
4628 dependence_stats
.num_siv
);
4629 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4630 dependence_stats
.num_siv_dependent
);
4631 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4632 dependence_stats
.num_siv_independent
);
4633 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4634 dependence_stats
.num_siv_unimplemented
);
4636 fprintf (dump_file
, "Number of miv tests: %d\n",
4637 dependence_stats
.num_miv
);
4638 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4639 dependence_stats
.num_miv_dependent
);
4640 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4641 dependence_stats
.num_miv_independent
);
4642 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4643 dependence_stats
.num_miv_unimplemented
);
4649 /* Returns true when the data dependences for the basic block BB have been
4650 computed, false otherwise.
4651 DATAREFS is initialized to all the array elements contained in this basic
4652 block, DEPENDENCE_RELATIONS contains the relations between the data
4653 references. Compute read-read and self relations if
4654 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4656 compute_data_dependences_for_bb (basic_block bb
,
4657 bool compute_self_and_read_read_dependences
,
4658 vec
<data_reference_p
> *datarefs
,
4659 vec
<ddr_p
> *dependence_relations
)
4661 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4664 return compute_all_dependences (*datarefs
, dependence_relations
, vNULL
,
4665 compute_self_and_read_read_dependences
);
4668 /* Entry point (for testing only). Analyze all the data references
4669 and the dependence relations in LOOP.
4671 The data references are computed first.
4673 A relation on these nodes is represented by a complete graph. Some
4674 of the relations could be of no interest, thus the relations can be
4677 In the following function we compute all the relations. This is
4678 just a first implementation that is here for:
4679 - for showing how to ask for the dependence relations,
4680 - for the debugging the whole dependence graph,
4681 - for the dejagnu testcases and maintenance.
4683 It is possible to ask only for a part of the graph, avoiding to
4684 compute the whole dependence graph. The computed dependences are
4685 stored in a knowledge base (KB) such that later queries don't
4686 recompute the same information. The implementation of this KB is
4687 transparent to the optimizer, and thus the KB can be changed with a
4688 more efficient implementation, or the KB could be disabled. */
4690 analyze_all_data_dependences (struct loop
*loop
)
4693 int nb_data_refs
= 10;
4694 vec
<data_reference_p
> datarefs
;
4695 datarefs
.create (nb_data_refs
);
4696 vec
<ddr_p
> dependence_relations
;
4697 dependence_relations
.create (nb_data_refs
* nb_data_refs
);
4698 vec
<loop_p
> loop_nest
;
4699 loop_nest
.create (3);
4701 /* Compute DDs on the whole function. */
4702 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4703 &dependence_relations
);
4707 dump_data_dependence_relations (dump_file
, dependence_relations
);
4708 fprintf (dump_file
, "\n\n");
4710 if (dump_flags
& TDF_DETAILS
)
4711 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4713 if (dump_flags
& TDF_STATS
)
4715 unsigned nb_top_relations
= 0;
4716 unsigned nb_bot_relations
= 0;
4717 unsigned nb_chrec_relations
= 0;
4718 struct data_dependence_relation
*ddr
;
4720 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4722 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4725 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4729 nb_chrec_relations
++;
4732 gather_stats_on_scev_database ();
4736 loop_nest
.release ();
4737 free_dependence_relations (dependence_relations
);
4738 free_data_refs (datarefs
);
4741 /* Computes all the data dependences and check that the results of
4742 several analyzers are the same. */
4745 tree_check_data_deps (void)
4748 struct loop
*loop_nest
;
4750 FOR_EACH_LOOP (li
, loop_nest
, 0)
4751 analyze_all_data_dependences (loop_nest
);
4754 /* Free the memory used by a data dependence relation DDR. */
4757 free_dependence_relation (struct data_dependence_relation
*ddr
)
4762 if (DDR_SUBSCRIPTS (ddr
).exists ())
4763 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4764 DDR_DIST_VECTS (ddr
).release ();
4765 DDR_DIR_VECTS (ddr
).release ();
4770 /* Free the memory used by the data dependence relations from
4771 DEPENDENCE_RELATIONS. */
4774 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4777 struct data_dependence_relation
*ddr
;
4779 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4781 free_dependence_relation (ddr
);
4783 dependence_relations
.release ();
4786 /* Free the memory used by the data references from DATAREFS. */
4789 free_data_refs (vec
<data_reference_p
> datarefs
)
4792 struct data_reference
*dr
;
4794 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4796 datarefs
.release ();