1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
84 #include "insn-config.h"
86 #include "gimple-pretty-print.h"
88 #include "fold-const.h"
96 #include "internal-fn.h"
97 #include "gimple-iterator.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
100 #include "tree-ssa.h"
102 #include "tree-data-ref.h"
103 #include "tree-scalar-evolution.h"
104 #include "dumpfile.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
109 static struct datadep_stats
111 int num_dependence_tests
;
112 int num_dependence_dependent
;
113 int num_dependence_independent
;
114 int num_dependence_undetermined
;
116 int num_subscript_tests
;
117 int num_subscript_undetermined
;
118 int num_same_subscript_function
;
121 int num_ziv_independent
;
122 int num_ziv_dependent
;
123 int num_ziv_unimplemented
;
126 int num_siv_independent
;
127 int num_siv_dependent
;
128 int num_siv_unimplemented
;
131 int num_miv_independent
;
132 int num_miv_dependent
;
133 int num_miv_unimplemented
;
136 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
137 struct data_reference
*,
138 struct data_reference
*,
140 /* Returns true iff A divides B. */
143 tree_fold_divides_p (const_tree a
, const_tree b
)
145 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
146 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
147 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
150 /* Returns true iff A divides B. */
153 int_divides_p (int a
, int b
)
155 return ((b
% a
) == 0);
160 /* Dump into FILE all the data references from DATAREFS. */
163 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
166 struct data_reference
*dr
;
168 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
169 dump_data_reference (file
, dr
);
172 /* Unified dump into FILE all the data references from DATAREFS. */
175 debug (vec
<data_reference_p
> &ref
)
177 dump_data_references (stderr
, ref
);
181 debug (vec
<data_reference_p
> *ptr
)
186 fprintf (stderr
, "<nil>\n");
190 /* Dump into STDERR all the data references from DATAREFS. */
193 debug_data_references (vec
<data_reference_p
> datarefs
)
195 dump_data_references (stderr
, datarefs
);
198 /* Print to STDERR the data_reference DR. */
201 debug_data_reference (struct data_reference
*dr
)
203 dump_data_reference (stderr
, dr
);
206 /* Dump function for a DATA_REFERENCE structure. */
209 dump_data_reference (FILE *outf
,
210 struct data_reference
*dr
)
214 fprintf (outf
, "#(Data Ref: \n");
215 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
216 fprintf (outf
, "# stmt: ");
217 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
218 fprintf (outf
, "# ref: ");
219 print_generic_stmt (outf
, DR_REF (dr
), 0);
220 fprintf (outf
, "# base_object: ");
221 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
223 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
225 fprintf (outf
, "# Access function %d: ", i
);
226 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
228 fprintf (outf
, "#)\n");
231 /* Unified dump function for a DATA_REFERENCE structure. */
234 debug (data_reference
&ref
)
236 dump_data_reference (stderr
, &ref
);
240 debug (data_reference
*ptr
)
245 fprintf (stderr
, "<nil>\n");
249 /* Dumps the affine function described by FN to the file OUTF. */
252 dump_affine_function (FILE *outf
, affine_fn fn
)
257 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
258 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
260 fprintf (outf
, " + ");
261 print_generic_expr (outf
, coef
, TDF_SLIM
);
262 fprintf (outf
, " * x_%u", i
);
266 /* Dumps the conflict function CF to the file OUTF. */
269 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
273 if (cf
->n
== NO_DEPENDENCE
)
274 fprintf (outf
, "no dependence");
275 else if (cf
->n
== NOT_KNOWN
)
276 fprintf (outf
, "not known");
279 for (i
= 0; i
< cf
->n
; i
++)
284 dump_affine_function (outf
, cf
->fns
[i
]);
290 /* Dump function for a SUBSCRIPT structure. */
293 dump_subscript (FILE *outf
, struct subscript
*subscript
)
295 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
297 fprintf (outf
, "\n (subscript \n");
298 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
299 dump_conflict_function (outf
, cf
);
300 if (CF_NONTRIVIAL_P (cf
))
302 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
303 fprintf (outf
, "\n last_conflict: ");
304 print_generic_expr (outf
, last_iteration
, 0);
307 cf
= SUB_CONFLICTS_IN_B (subscript
);
308 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
309 dump_conflict_function (outf
, cf
);
310 if (CF_NONTRIVIAL_P (cf
))
312 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
313 fprintf (outf
, "\n last_conflict: ");
314 print_generic_expr (outf
, last_iteration
, 0);
317 fprintf (outf
, "\n (Subscript distance: ");
318 print_generic_expr (outf
, SUB_DISTANCE (subscript
), 0);
319 fprintf (outf
, " ))\n");
322 /* Print the classic direction vector DIRV to OUTF. */
325 print_direction_vector (FILE *outf
,
331 for (eq
= 0; eq
< length
; eq
++)
333 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
339 fprintf (outf
, " +");
342 fprintf (outf
, " -");
345 fprintf (outf
, " =");
347 case dir_positive_or_equal
:
348 fprintf (outf
, " +=");
350 case dir_positive_or_negative
:
351 fprintf (outf
, " +-");
353 case dir_negative_or_equal
:
354 fprintf (outf
, " -=");
357 fprintf (outf
, " *");
360 fprintf (outf
, "indep");
364 fprintf (outf
, "\n");
367 /* Print a vector of direction vectors. */
370 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
376 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
377 print_direction_vector (outf
, v
, length
);
380 /* Print out a vector VEC of length N to OUTFILE. */
383 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
387 for (i
= 0; i
< n
; i
++)
388 fprintf (outfile
, "%3d ", vector
[i
]);
389 fprintf (outfile
, "\n");
392 /* Print a vector of distance vectors. */
395 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
401 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
402 print_lambda_vector (outf
, v
, length
);
405 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
408 dump_data_dependence_relation (FILE *outf
,
409 struct data_dependence_relation
*ddr
)
411 struct data_reference
*dra
, *drb
;
413 fprintf (outf
, "(Data Dep: \n");
415 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
422 dump_data_reference (outf
, dra
);
424 fprintf (outf
, " (nil)\n");
426 dump_data_reference (outf
, drb
);
428 fprintf (outf
, " (nil)\n");
430 fprintf (outf
, " (don't know)\n)\n");
436 dump_data_reference (outf
, dra
);
437 dump_data_reference (outf
, drb
);
439 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
440 fprintf (outf
, " (no dependence)\n");
442 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
447 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
449 fprintf (outf
, " access_fn_A: ");
450 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
451 fprintf (outf
, " access_fn_B: ");
452 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
453 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
456 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
457 fprintf (outf
, " loop nest: (");
458 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
459 fprintf (outf
, "%d ", loopi
->num
);
460 fprintf (outf
, ")\n");
462 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
464 fprintf (outf
, " distance_vector: ");
465 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
469 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
471 fprintf (outf
, " direction_vector: ");
472 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
477 fprintf (outf
, ")\n");
483 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
485 dump_data_dependence_relation (stderr
, ddr
);
488 /* Dump into FILE all the dependence relations from DDRS. */
491 dump_data_dependence_relations (FILE *file
,
495 struct data_dependence_relation
*ddr
;
497 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
498 dump_data_dependence_relation (file
, ddr
);
502 debug (vec
<ddr_p
> &ref
)
504 dump_data_dependence_relations (stderr
, ref
);
508 debug (vec
<ddr_p
> *ptr
)
513 fprintf (stderr
, "<nil>\n");
517 /* Dump to STDERR all the dependence relations from DDRS. */
520 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
522 dump_data_dependence_relations (stderr
, ddrs
);
525 /* Dumps the distance and direction vectors in FILE. DDRS contains
526 the dependence relations, and VECT_SIZE is the size of the
527 dependence vectors, or in other words the number of loops in the
531 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
534 struct data_dependence_relation
*ddr
;
537 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
538 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
540 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
542 fprintf (file
, "DISTANCE_V (");
543 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
544 fprintf (file
, ")\n");
547 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
549 fprintf (file
, "DIRECTION_V (");
550 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
551 fprintf (file
, ")\n");
555 fprintf (file
, "\n\n");
558 /* Dumps the data dependence relations DDRS in FILE. */
561 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
564 struct data_dependence_relation
*ddr
;
566 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
567 dump_data_dependence_relation (file
, ddr
);
569 fprintf (file
, "\n\n");
573 debug_ddrs (vec
<ddr_p
> ddrs
)
575 dump_ddrs (stderr
, ddrs
);
578 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
579 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
580 constant of type ssizetype, and returns true. If we cannot do this
581 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
585 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
586 tree
*var
, tree
*off
)
590 enum tree_code ocode
= code
;
598 *var
= build_int_cst (type
, 0);
599 *off
= fold_convert (ssizetype
, op0
);
602 case POINTER_PLUS_EXPR
:
607 split_constant_offset (op0
, &var0
, &off0
);
608 split_constant_offset (op1
, &var1
, &off1
);
609 *var
= fold_build2 (code
, type
, var0
, var1
);
610 *off
= size_binop (ocode
, off0
, off1
);
614 if (TREE_CODE (op1
) != INTEGER_CST
)
617 split_constant_offset (op0
, &var0
, &off0
);
618 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
619 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
625 HOST_WIDE_INT pbitsize
, pbitpos
;
627 int punsignedp
, pvolatilep
;
629 op0
= TREE_OPERAND (op0
, 0);
630 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
631 &pmode
, &punsignedp
, &pvolatilep
, false);
633 if (pbitpos
% BITS_PER_UNIT
!= 0)
635 base
= build_fold_addr_expr (base
);
636 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
640 split_constant_offset (poffset
, &poffset
, &off1
);
641 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
642 if (POINTER_TYPE_P (TREE_TYPE (base
)))
643 base
= fold_build_pointer_plus (base
, poffset
);
645 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
646 fold_convert (TREE_TYPE (base
), poffset
));
649 var0
= fold_convert (type
, base
);
651 /* If variable length types are involved, punt, otherwise casts
652 might be converted into ARRAY_REFs in gimplify_conversion.
653 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
654 possibly no longer appears in current GIMPLE, might resurface.
655 This perhaps could run
656 if (CONVERT_EXPR_P (var0))
658 gimplify_conversion (&var0);
659 // Attempt to fill in any within var0 found ARRAY_REF's
660 // element size from corresponding op embedded ARRAY_REF,
661 // if unsuccessful, just punt.
663 while (POINTER_TYPE_P (type
))
664 type
= TREE_TYPE (type
);
665 if (int_size_in_bytes (type
) < 0)
675 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
678 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
679 enum tree_code subcode
;
681 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
684 var0
= gimple_assign_rhs1 (def_stmt
);
685 subcode
= gimple_assign_rhs_code (def_stmt
);
686 var1
= gimple_assign_rhs2 (def_stmt
);
688 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
692 /* We must not introduce undefined overflow, and we must not change the value.
693 Hence we're okay if the inner type doesn't overflow to start with
694 (pointer or signed), the outer type also is an integer or pointer
695 and the outer precision is at least as large as the inner. */
696 tree itype
= TREE_TYPE (op0
);
697 if ((POINTER_TYPE_P (itype
)
698 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
699 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
700 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
702 split_constant_offset (op0
, &var0
, off
);
703 *var
= fold_convert (type
, var0
);
714 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
715 will be ssizetype. */
718 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
720 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
724 *off
= ssize_int (0);
727 if (tree_is_chrec (exp
)
728 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
731 otype
= TREE_TYPE (exp
);
732 code
= TREE_CODE (exp
);
733 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
734 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
736 *var
= fold_convert (type
, e
);
741 /* Returns the address ADDR of an object in a canonical shape (without nop
742 casts, and with type of pointer to the object). */
745 canonicalize_base_object_address (tree addr
)
751 /* The base address may be obtained by casting from integer, in that case
753 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
756 if (TREE_CODE (addr
) != ADDR_EXPR
)
759 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
762 /* Analyzes the behavior of the memory reference DR in the innermost loop or
763 basic block that contains it. Returns true if analysis succeed or false
767 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
769 gimple
*stmt
= DR_STMT (dr
);
770 struct loop
*loop
= loop_containing_stmt (stmt
);
771 tree ref
= DR_REF (dr
);
772 HOST_WIDE_INT pbitsize
, pbitpos
;
775 int punsignedp
, pvolatilep
;
776 affine_iv base_iv
, offset_iv
;
777 tree init
, dinit
, step
;
778 bool in_loop
= (loop
&& loop
->num
);
780 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
781 fprintf (dump_file
, "analyze_innermost: ");
783 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
784 &pmode
, &punsignedp
, &pvolatilep
, false);
785 gcc_assert (base
!= NULL_TREE
);
787 if (pbitpos
% BITS_PER_UNIT
!= 0)
789 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
790 fprintf (dump_file
, "failed: bit offset alignment.\n");
794 if (TREE_CODE (base
) == MEM_REF
)
796 if (!integer_zerop (TREE_OPERAND (base
, 1)))
798 offset_int moff
= mem_ref_offset (base
);
799 tree mofft
= wide_int_to_tree (sizetype
, moff
);
803 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
805 base
= TREE_OPERAND (base
, 0);
808 base
= build_fold_addr_expr (base
);
812 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
813 nest
? true : false))
817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
818 fprintf (dump_file
, "failed: evolution of base is not"
825 base_iv
.step
= ssize_int (0);
826 base_iv
.no_overflow
= true;
833 base_iv
.step
= ssize_int (0);
834 base_iv
.no_overflow
= true;
839 offset_iv
.base
= ssize_int (0);
840 offset_iv
.step
= ssize_int (0);
846 offset_iv
.base
= poffset
;
847 offset_iv
.step
= ssize_int (0);
849 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
851 nest
? true : false))
855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
856 fprintf (dump_file
, "failed: evolution of offset is not"
862 offset_iv
.base
= poffset
;
863 offset_iv
.step
= ssize_int (0);
868 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
869 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
870 init
= size_binop (PLUS_EXPR
, init
, dinit
);
871 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
872 init
= size_binop (PLUS_EXPR
, init
, dinit
);
874 step
= size_binop (PLUS_EXPR
,
875 fold_convert (ssizetype
, base_iv
.step
),
876 fold_convert (ssizetype
, offset_iv
.step
));
878 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
880 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
884 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
887 fprintf (dump_file
, "success.\n");
892 /* Determines the base object and the list of indices of memory reference
893 DR, analyzed in LOOP and instantiated in loop nest NEST. */
896 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
898 vec
<tree
> access_fns
= vNULL
;
900 tree base
, off
, access_fn
;
901 basic_block before_loop
;
903 /* If analyzing a basic-block there are no indices to analyze
904 and thus no access functions. */
907 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
908 DR_ACCESS_FNS (dr
).create (0);
913 before_loop
= block_before_loop (nest
);
915 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
916 into a two element array with a constant index. The base is
917 then just the immediate underlying object. */
918 if (TREE_CODE (ref
) == REALPART_EXPR
)
920 ref
= TREE_OPERAND (ref
, 0);
921 access_fns
.safe_push (integer_zero_node
);
923 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
925 ref
= TREE_OPERAND (ref
, 0);
926 access_fns
.safe_push (integer_one_node
);
929 /* Analyze access functions of dimensions we know to be independent. */
930 while (handled_component_p (ref
))
932 if (TREE_CODE (ref
) == ARRAY_REF
)
934 op
= TREE_OPERAND (ref
, 1);
935 access_fn
= analyze_scalar_evolution (loop
, op
);
936 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
937 access_fns
.safe_push (access_fn
);
939 else if (TREE_CODE (ref
) == COMPONENT_REF
940 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
942 /* For COMPONENT_REFs of records (but not unions!) use the
943 FIELD_DECL offset as constant access function so we can
944 disambiguate a[i].f1 and a[i].f2. */
945 tree off
= component_ref_field_offset (ref
);
946 off
= size_binop (PLUS_EXPR
,
947 size_binop (MULT_EXPR
,
948 fold_convert (bitsizetype
, off
),
949 bitsize_int (BITS_PER_UNIT
)),
950 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
951 access_fns
.safe_push (off
);
954 /* If we have an unhandled component we could not translate
955 to an access function stop analyzing. We have determined
956 our base object in this case. */
959 ref
= TREE_OPERAND (ref
, 0);
962 /* If the address operand of a MEM_REF base has an evolution in the
963 analyzed nest, add it as an additional independent access-function. */
964 if (TREE_CODE (ref
) == MEM_REF
)
966 op
= TREE_OPERAND (ref
, 0);
967 access_fn
= analyze_scalar_evolution (loop
, op
);
968 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
969 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
972 tree memoff
= TREE_OPERAND (ref
, 1);
973 base
= initial_condition (access_fn
);
974 orig_type
= TREE_TYPE (base
);
975 STRIP_USELESS_TYPE_CONVERSION (base
);
976 split_constant_offset (base
, &base
, &off
);
977 STRIP_USELESS_TYPE_CONVERSION (base
);
978 /* Fold the MEM_REF offset into the evolutions initial
979 value to make more bases comparable. */
980 if (!integer_zerop (memoff
))
982 off
= size_binop (PLUS_EXPR
, off
,
983 fold_convert (ssizetype
, memoff
));
984 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
986 /* Adjust the offset so it is a multiple of the access type
987 size and thus we separate bases that can possibly be used
988 to produce partial overlaps (which the access_fn machinery
991 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
992 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
993 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
994 rem
= wi::mod_trunc (off
, TYPE_SIZE_UNIT (TREE_TYPE (ref
)), SIGNED
);
996 /* If we can't compute the remainder simply force the initial
997 condition to zero. */
999 off
= wide_int_to_tree (ssizetype
, wi::sub (off
, rem
));
1000 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1001 /* And finally replace the initial condition. */
1002 access_fn
= chrec_replace_initial_condition
1003 (access_fn
, fold_convert (orig_type
, off
));
1004 /* ??? This is still not a suitable base object for
1005 dr_may_alias_p - the base object needs to be an
1006 access that covers the object as whole. With
1007 an evolution in the pointer this cannot be
1009 As a band-aid, mark the access so we can special-case
1010 it in dr_may_alias_p. */
1012 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1013 MEM_REF
, TREE_TYPE (ref
),
1015 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1016 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1017 DR_UNCONSTRAINED_BASE (dr
) = true;
1018 access_fns
.safe_push (access_fn
);
1021 else if (DECL_P (ref
))
1023 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1024 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1025 build_fold_addr_expr (ref
),
1026 build_int_cst (reference_alias_ptr_type (ref
), 0));
1029 DR_BASE_OBJECT (dr
) = ref
;
1030 DR_ACCESS_FNS (dr
) = access_fns
;
1033 /* Extracts the alias analysis information from the memory reference DR. */
1036 dr_analyze_alias (struct data_reference
*dr
)
1038 tree ref
= DR_REF (dr
);
1039 tree base
= get_base_address (ref
), addr
;
1041 if (INDIRECT_REF_P (base
)
1042 || TREE_CODE (base
) == MEM_REF
)
1044 addr
= TREE_OPERAND (base
, 0);
1045 if (TREE_CODE (addr
) == SSA_NAME
)
1046 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1050 /* Frees data reference DR. */
1053 free_data_ref (data_reference_p dr
)
1055 DR_ACCESS_FNS (dr
).release ();
1059 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1060 is read if IS_READ is true, write otherwise. Returns the
1061 data_reference description of MEMREF. NEST is the outermost loop
1062 in which the reference should be instantiated, LOOP is the loop in
1063 which the data reference should be analyzed. */
1065 struct data_reference
*
1066 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1069 struct data_reference
*dr
;
1071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1073 fprintf (dump_file
, "Creating dr for ");
1074 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1075 fprintf (dump_file
, "\n");
1078 dr
= XCNEW (struct data_reference
);
1079 DR_STMT (dr
) = stmt
;
1080 DR_REF (dr
) = memref
;
1081 DR_IS_READ (dr
) = is_read
;
1083 dr_analyze_innermost (dr
, nest
);
1084 dr_analyze_indices (dr
, nest
, loop
);
1085 dr_analyze_alias (dr
);
1087 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1090 fprintf (dump_file
, "\tbase_address: ");
1091 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1092 fprintf (dump_file
, "\n\toffset from base address: ");
1093 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1094 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1095 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1096 fprintf (dump_file
, "\n\tstep: ");
1097 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1098 fprintf (dump_file
, "\n\taligned to: ");
1099 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1100 fprintf (dump_file
, "\n\tbase_object: ");
1101 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1102 fprintf (dump_file
, "\n");
1103 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1105 fprintf (dump_file
, "\tAccess function %d: ", i
);
1106 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1113 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1116 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1120 STRIP_NOPS (offset1
);
1121 STRIP_NOPS (offset2
);
1123 if (offset1
== offset2
)
1126 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1127 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1130 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1131 TREE_OPERAND (offset2
, 0));
1133 if (!res
|| !BINARY_CLASS_P (offset1
))
1136 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1137 TREE_OPERAND (offset2
, 1));
1142 /* Check if DRA and DRB have equal offsets. */
1144 dr_equal_offsets_p (struct data_reference
*dra
,
1145 struct data_reference
*drb
)
1147 tree offset1
, offset2
;
1149 offset1
= DR_OFFSET (dra
);
1150 offset2
= DR_OFFSET (drb
);
1152 return dr_equal_offsets_p1 (offset1
, offset2
);
1155 /* Returns true if FNA == FNB. */
1158 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1160 unsigned i
, n
= fna
.length ();
1162 if (n
!= fnb
.length ())
1165 for (i
= 0; i
< n
; i
++)
1166 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1172 /* If all the functions in CF are the same, returns one of them,
1173 otherwise returns NULL. */
1176 common_affine_function (conflict_function
*cf
)
1181 if (!CF_NONTRIVIAL_P (cf
))
1182 return affine_fn ();
1186 for (i
= 1; i
< cf
->n
; i
++)
1187 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1188 return affine_fn ();
1193 /* Returns the base of the affine function FN. */
1196 affine_function_base (affine_fn fn
)
1201 /* Returns true if FN is a constant. */
1204 affine_function_constant_p (affine_fn fn
)
1209 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1210 if (!integer_zerop (coef
))
1216 /* Returns true if FN is the zero constant function. */
1219 affine_function_zero_p (affine_fn fn
)
1221 return (integer_zerop (affine_function_base (fn
))
1222 && affine_function_constant_p (fn
));
1225 /* Returns a signed integer type with the largest precision from TA
1229 signed_type_for_types (tree ta
, tree tb
)
1231 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1232 return signed_type_for (ta
);
1234 return signed_type_for (tb
);
1237 /* Applies operation OP on affine functions FNA and FNB, and returns the
1241 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1247 if (fnb
.length () > fna
.length ())
1259 for (i
= 0; i
< n
; i
++)
1261 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1262 TREE_TYPE (fnb
[i
]));
1263 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1266 for (; fna
.iterate (i
, &coef
); i
++)
1267 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1268 coef
, integer_zero_node
));
1269 for (; fnb
.iterate (i
, &coef
); i
++)
1270 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1271 integer_zero_node
, coef
));
1276 /* Returns the sum of affine functions FNA and FNB. */
1279 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1281 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1284 /* Returns the difference of affine functions FNA and FNB. */
1287 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1289 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1292 /* Frees affine function FN. */
1295 affine_fn_free (affine_fn fn
)
1300 /* Determine for each subscript in the data dependence relation DDR
1304 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1306 conflict_function
*cf_a
, *cf_b
;
1307 affine_fn fn_a
, fn_b
, diff
;
1309 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1313 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1315 struct subscript
*subscript
;
1317 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1318 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1319 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1321 fn_a
= common_affine_function (cf_a
);
1322 fn_b
= common_affine_function (cf_b
);
1323 if (!fn_a
.exists () || !fn_b
.exists ())
1325 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1328 diff
= affine_fn_minus (fn_a
, fn_b
);
1330 if (affine_function_constant_p (diff
))
1331 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1333 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1335 affine_fn_free (diff
);
1340 /* Returns the conflict function for "unknown". */
1342 static conflict_function
*
1343 conflict_fn_not_known (void)
1345 conflict_function
*fn
= XCNEW (conflict_function
);
1351 /* Returns the conflict function for "independent". */
1353 static conflict_function
*
1354 conflict_fn_no_dependence (void)
1356 conflict_function
*fn
= XCNEW (conflict_function
);
1357 fn
->n
= NO_DEPENDENCE
;
1362 /* Returns true if the address of OBJ is invariant in LOOP. */
1365 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1367 while (handled_component_p (obj
))
1369 if (TREE_CODE (obj
) == ARRAY_REF
)
1371 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1372 need to check the stride and the lower bound of the reference. */
1373 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1375 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1379 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1381 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1385 obj
= TREE_OPERAND (obj
, 0);
1388 if (!INDIRECT_REF_P (obj
)
1389 && TREE_CODE (obj
) != MEM_REF
)
1392 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1396 /* Returns false if we can prove that data references A and B do not alias,
1397 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1401 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1404 tree addr_a
= DR_BASE_OBJECT (a
);
1405 tree addr_b
= DR_BASE_OBJECT (b
);
1407 /* If we are not processing a loop nest but scalar code we
1408 do not need to care about possible cross-iteration dependences
1409 and thus can process the full original reference. Do so,
1410 similar to how loop invariant motion applies extra offset-based
1414 aff_tree off1
, off2
;
1415 widest_int size1
, size2
;
1416 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1417 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1418 aff_combination_scale (&off1
, -1);
1419 aff_combination_add (&off2
, &off1
);
1420 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1424 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
1425 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
1426 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
1427 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
1430 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1431 do not know the size of the base-object. So we cannot do any
1432 offset/overlap based analysis but have to rely on points-to
1433 information only. */
1434 if (TREE_CODE (addr_a
) == MEM_REF
1435 && (DR_UNCONSTRAINED_BASE (a
)
1436 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
1438 /* For true dependences we can apply TBAA. */
1439 if (flag_strict_aliasing
1440 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1441 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1442 get_alias_set (DR_REF (b
))))
1444 if (TREE_CODE (addr_b
) == MEM_REF
)
1445 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1446 TREE_OPERAND (addr_b
, 0));
1448 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1449 build_fold_addr_expr (addr_b
));
1451 else if (TREE_CODE (addr_b
) == MEM_REF
1452 && (DR_UNCONSTRAINED_BASE (b
)
1453 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
1455 /* For true dependences we can apply TBAA. */
1456 if (flag_strict_aliasing
1457 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1458 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1459 get_alias_set (DR_REF (b
))))
1461 if (TREE_CODE (addr_a
) == MEM_REF
)
1462 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1463 TREE_OPERAND (addr_b
, 0));
1465 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1466 TREE_OPERAND (addr_b
, 0));
1469 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1470 that is being subsetted in the loop nest. */
1471 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1472 return refs_output_dependent_p (addr_a
, addr_b
);
1473 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1474 return refs_anti_dependent_p (addr_a
, addr_b
);
1475 return refs_may_alias_p (addr_a
, addr_b
);
1478 /* Initialize a data dependence relation between data accesses A and
1479 B. NB_LOOPS is the number of loops surrounding the references: the
1480 size of the classic distance/direction vectors. */
1482 struct data_dependence_relation
*
1483 initialize_data_dependence_relation (struct data_reference
*a
,
1484 struct data_reference
*b
,
1485 vec
<loop_p
> loop_nest
)
1487 struct data_dependence_relation
*res
;
1490 res
= XNEW (struct data_dependence_relation
);
1493 DDR_LOOP_NEST (res
).create (0);
1494 DDR_REVERSED_P (res
) = false;
1495 DDR_SUBSCRIPTS (res
).create (0);
1496 DDR_DIR_VECTS (res
).create (0);
1497 DDR_DIST_VECTS (res
).create (0);
1499 if (a
== NULL
|| b
== NULL
)
1501 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1505 /* If the data references do not alias, then they are independent. */
1506 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1508 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1512 /* The case where the references are exactly the same. */
1513 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1515 if (loop_nest
.exists ()
1516 && !object_address_invariant_in_loop_p (loop_nest
[0],
1517 DR_BASE_OBJECT (a
)))
1519 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1522 DDR_AFFINE_P (res
) = true;
1523 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1524 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1525 DDR_LOOP_NEST (res
) = loop_nest
;
1526 DDR_INNER_LOOP (res
) = 0;
1527 DDR_SELF_REFERENCE (res
) = true;
1528 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1530 struct subscript
*subscript
;
1532 subscript
= XNEW (struct subscript
);
1533 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1534 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1535 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1536 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1537 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1542 /* If the references do not access the same object, we do not know
1543 whether they alias or not. */
1544 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1546 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1550 /* If the base of the object is not invariant in the loop nest, we cannot
1551 analyze it. TODO -- in fact, it would suffice to record that there may
1552 be arbitrary dependences in the loops where the base object varies. */
1553 if (loop_nest
.exists ()
1554 && !object_address_invariant_in_loop_p (loop_nest
[0],
1555 DR_BASE_OBJECT (a
)))
1557 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1561 /* If the number of dimensions of the access to not agree we can have
1562 a pointer access to a component of the array element type and an
1563 array access while the base-objects are still the same. Punt. */
1564 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1566 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1570 DDR_AFFINE_P (res
) = true;
1571 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1572 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1573 DDR_LOOP_NEST (res
) = loop_nest
;
1574 DDR_INNER_LOOP (res
) = 0;
1575 DDR_SELF_REFERENCE (res
) = false;
1577 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1579 struct subscript
*subscript
;
1581 subscript
= XNEW (struct subscript
);
1582 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1583 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1584 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1585 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1586 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1592 /* Frees memory used by the conflict function F. */
1595 free_conflict_function (conflict_function
*f
)
1599 if (CF_NONTRIVIAL_P (f
))
1601 for (i
= 0; i
< f
->n
; i
++)
1602 affine_fn_free (f
->fns
[i
]);
1607 /* Frees memory used by SUBSCRIPTS. */
1610 free_subscripts (vec
<subscript_p
> subscripts
)
1615 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1617 free_conflict_function (s
->conflicting_iterations_in_a
);
1618 free_conflict_function (s
->conflicting_iterations_in_b
);
1621 subscripts
.release ();
1624 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1628 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1631 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1632 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1633 DDR_SUBSCRIPTS (ddr
).create (0);
1636 /* The dependence relation DDR cannot be represented by a distance
1640 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1642 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1643 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1645 DDR_AFFINE_P (ddr
) = false;
1650 /* This section contains the classic Banerjee tests. */
1652 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1653 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1656 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1658 return (evolution_function_is_constant_p (chrec_a
)
1659 && evolution_function_is_constant_p (chrec_b
));
1662 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1663 variable, i.e., if the SIV (Single Index Variable) test is true. */
1666 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1668 if ((evolution_function_is_constant_p (chrec_a
)
1669 && evolution_function_is_univariate_p (chrec_b
))
1670 || (evolution_function_is_constant_p (chrec_b
)
1671 && evolution_function_is_univariate_p (chrec_a
)))
1674 if (evolution_function_is_univariate_p (chrec_a
)
1675 && evolution_function_is_univariate_p (chrec_b
))
1677 switch (TREE_CODE (chrec_a
))
1679 case POLYNOMIAL_CHREC
:
1680 switch (TREE_CODE (chrec_b
))
1682 case POLYNOMIAL_CHREC
:
1683 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1698 /* Creates a conflict function with N dimensions. The affine functions
1699 in each dimension follow. */
1701 static conflict_function
*
1702 conflict_fn (unsigned n
, ...)
1705 conflict_function
*ret
= XCNEW (conflict_function
);
1708 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1712 for (i
= 0; i
< n
; i
++)
1713 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1719 /* Returns constant affine function with value CST. */
1722 affine_fn_cst (tree cst
)
1726 fn
.quick_push (cst
);
1730 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1733 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1736 fn
.create (dim
+ 1);
1739 gcc_assert (dim
> 0);
1740 fn
.quick_push (cst
);
1741 for (i
= 1; i
< dim
; i
++)
1742 fn
.quick_push (integer_zero_node
);
1743 fn
.quick_push (coef
);
1747 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1748 *OVERLAPS_B are initialized to the functions that describe the
1749 relation between the elements accessed twice by CHREC_A and
1750 CHREC_B. For k >= 0, the following property is verified:
1752 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1755 analyze_ziv_subscript (tree chrec_a
,
1757 conflict_function
**overlaps_a
,
1758 conflict_function
**overlaps_b
,
1759 tree
*last_conflicts
)
1761 tree type
, difference
;
1762 dependence_stats
.num_ziv
++;
1764 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1765 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1767 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1768 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1769 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1770 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1772 switch (TREE_CODE (difference
))
1775 if (integer_zerop (difference
))
1777 /* The difference is equal to zero: the accessed index
1778 overlaps for each iteration in the loop. */
1779 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1780 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1781 *last_conflicts
= chrec_dont_know
;
1782 dependence_stats
.num_ziv_dependent
++;
1786 /* The accesses do not overlap. */
1787 *overlaps_a
= conflict_fn_no_dependence ();
1788 *overlaps_b
= conflict_fn_no_dependence ();
1789 *last_conflicts
= integer_zero_node
;
1790 dependence_stats
.num_ziv_independent
++;
1795 /* We're not sure whether the indexes overlap. For the moment,
1796 conservatively answer "don't know". */
1797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1798 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1800 *overlaps_a
= conflict_fn_not_known ();
1801 *overlaps_b
= conflict_fn_not_known ();
1802 *last_conflicts
= chrec_dont_know
;
1803 dependence_stats
.num_ziv_unimplemented
++;
1807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1808 fprintf (dump_file
, ")\n");
1811 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1812 and only if it fits to the int type. If this is not the case, or the
1813 bound on the number of iterations of LOOP could not be derived, returns
1817 max_stmt_executions_tree (struct loop
*loop
)
1821 if (!max_stmt_executions (loop
, &nit
))
1822 return chrec_dont_know
;
1824 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
1825 return chrec_dont_know
;
1827 return wide_int_to_tree (unsigned_type_node
, nit
);
1830 /* Determine whether the CHREC is always positive/negative. If the expression
1831 cannot be statically analyzed, return false, otherwise set the answer into
1835 chrec_is_positive (tree chrec
, bool *value
)
1837 bool value0
, value1
, value2
;
1838 tree end_value
, nb_iter
;
1840 switch (TREE_CODE (chrec
))
1842 case POLYNOMIAL_CHREC
:
1843 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1844 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1847 /* FIXME -- overflows. */
1848 if (value0
== value1
)
1854 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1855 and the proof consists in showing that the sign never
1856 changes during the execution of the loop, from 0 to
1857 loop->nb_iterations. */
1858 if (!evolution_function_is_affine_p (chrec
))
1861 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1862 if (chrec_contains_undetermined (nb_iter
))
1866 /* TODO -- If the test is after the exit, we may decrease the number of
1867 iterations by one. */
1869 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1872 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1874 if (!chrec_is_positive (end_value
, &value2
))
1878 return value0
== value1
;
1881 switch (tree_int_cst_sgn (chrec
))
1900 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1901 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1902 *OVERLAPS_B are initialized to the functions that describe the
1903 relation between the elements accessed twice by CHREC_A and
1904 CHREC_B. For k >= 0, the following property is verified:
1906 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1909 analyze_siv_subscript_cst_affine (tree chrec_a
,
1911 conflict_function
**overlaps_a
,
1912 conflict_function
**overlaps_b
,
1913 tree
*last_conflicts
)
1915 bool value0
, value1
, value2
;
1916 tree type
, difference
, tmp
;
1918 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1919 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1920 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1921 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1923 /* Special case overlap in the first iteration. */
1924 if (integer_zerop (difference
))
1926 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1927 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1928 *last_conflicts
= integer_one_node
;
1932 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1934 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1935 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1937 dependence_stats
.num_siv_unimplemented
++;
1938 *overlaps_a
= conflict_fn_not_known ();
1939 *overlaps_b
= conflict_fn_not_known ();
1940 *last_conflicts
= chrec_dont_know
;
1945 if (value0
== false)
1947 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1949 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1950 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1952 *overlaps_a
= conflict_fn_not_known ();
1953 *overlaps_b
= conflict_fn_not_known ();
1954 *last_conflicts
= chrec_dont_know
;
1955 dependence_stats
.num_siv_unimplemented
++;
1964 chrec_b = {10, +, 1}
1967 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1969 HOST_WIDE_INT numiter
;
1970 struct loop
*loop
= get_chrec_loop (chrec_b
);
1972 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1973 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1974 fold_build1 (ABS_EXPR
, type
, difference
),
1975 CHREC_RIGHT (chrec_b
));
1976 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1977 *last_conflicts
= integer_one_node
;
1980 /* Perform weak-zero siv test to see if overlap is
1981 outside the loop bounds. */
1982 numiter
= max_stmt_executions_int (loop
);
1985 && compare_tree_int (tmp
, numiter
) > 0)
1987 free_conflict_function (*overlaps_a
);
1988 free_conflict_function (*overlaps_b
);
1989 *overlaps_a
= conflict_fn_no_dependence ();
1990 *overlaps_b
= conflict_fn_no_dependence ();
1991 *last_conflicts
= integer_zero_node
;
1992 dependence_stats
.num_siv_independent
++;
1995 dependence_stats
.num_siv_dependent
++;
1999 /* When the step does not divide the difference, there are
2003 *overlaps_a
= conflict_fn_no_dependence ();
2004 *overlaps_b
= conflict_fn_no_dependence ();
2005 *last_conflicts
= integer_zero_node
;
2006 dependence_stats
.num_siv_independent
++;
2015 chrec_b = {10, +, -1}
2017 In this case, chrec_a will not overlap with chrec_b. */
2018 *overlaps_a
= conflict_fn_no_dependence ();
2019 *overlaps_b
= conflict_fn_no_dependence ();
2020 *last_conflicts
= integer_zero_node
;
2021 dependence_stats
.num_siv_independent
++;
2028 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
2030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2031 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
2033 *overlaps_a
= conflict_fn_not_known ();
2034 *overlaps_b
= conflict_fn_not_known ();
2035 *last_conflicts
= chrec_dont_know
;
2036 dependence_stats
.num_siv_unimplemented
++;
2041 if (value2
== false)
2045 chrec_b = {10, +, -1}
2047 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
2049 HOST_WIDE_INT numiter
;
2050 struct loop
*loop
= get_chrec_loop (chrec_b
);
2052 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2053 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
2054 CHREC_RIGHT (chrec_b
));
2055 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
2056 *last_conflicts
= integer_one_node
;
2058 /* Perform weak-zero siv test to see if overlap is
2059 outside the loop bounds. */
2060 numiter
= max_stmt_executions_int (loop
);
2063 && compare_tree_int (tmp
, numiter
) > 0)
2065 free_conflict_function (*overlaps_a
);
2066 free_conflict_function (*overlaps_b
);
2067 *overlaps_a
= conflict_fn_no_dependence ();
2068 *overlaps_b
= conflict_fn_no_dependence ();
2069 *last_conflicts
= integer_zero_node
;
2070 dependence_stats
.num_siv_independent
++;
2073 dependence_stats
.num_siv_dependent
++;
2077 /* When the step does not divide the difference, there
2081 *overlaps_a
= conflict_fn_no_dependence ();
2082 *overlaps_b
= conflict_fn_no_dependence ();
2083 *last_conflicts
= integer_zero_node
;
2084 dependence_stats
.num_siv_independent
++;
2094 In this case, chrec_a will not overlap with chrec_b. */
2095 *overlaps_a
= conflict_fn_no_dependence ();
2096 *overlaps_b
= conflict_fn_no_dependence ();
2097 *last_conflicts
= integer_zero_node
;
2098 dependence_stats
.num_siv_independent
++;
2106 /* Helper recursive function for initializing the matrix A. Returns
2107 the initial value of CHREC. */
2110 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2114 switch (TREE_CODE (chrec
))
2116 case POLYNOMIAL_CHREC
:
2117 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2119 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2120 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2126 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2127 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2129 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2134 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2135 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2140 /* Handle ~X as -1 - X. */
2141 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2142 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2143 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2155 #define FLOOR_DIV(x,y) ((x) / (y))
2157 /* Solves the special case of the Diophantine equation:
2158 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2160 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2161 number of iterations that loops X and Y run. The overlaps will be
2162 constructed as evolutions in dimension DIM. */
2165 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2166 affine_fn
*overlaps_a
,
2167 affine_fn
*overlaps_b
,
2168 tree
*last_conflicts
, int dim
)
2170 if (((step_a
> 0 && step_b
> 0)
2171 || (step_a
< 0 && step_b
< 0)))
2173 int step_overlaps_a
, step_overlaps_b
;
2174 int gcd_steps_a_b
, last_conflict
, tau2
;
2176 gcd_steps_a_b
= gcd (step_a
, step_b
);
2177 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2178 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2182 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2183 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2184 last_conflict
= tau2
;
2185 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2188 *last_conflicts
= chrec_dont_know
;
2190 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2191 build_int_cst (NULL_TREE
,
2193 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2194 build_int_cst (NULL_TREE
,
2200 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2201 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2202 *last_conflicts
= integer_zero_node
;
2206 /* Solves the special case of a Diophantine equation where CHREC_A is
2207 an affine bivariate function, and CHREC_B is an affine univariate
2208 function. For example,
2210 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2212 has the following overlapping functions:
2214 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2215 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2216 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2218 FORNOW: This is a specialized implementation for a case occurring in
2219 a common benchmark. Implement the general algorithm. */
2222 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2223 conflict_function
**overlaps_a
,
2224 conflict_function
**overlaps_b
,
2225 tree
*last_conflicts
)
2227 bool xz_p
, yz_p
, xyz_p
;
2228 int step_x
, step_y
, step_z
;
2229 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2230 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2231 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2232 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2233 affine_fn ova1
, ova2
, ovb
;
2234 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2236 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2237 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2238 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2240 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2241 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2242 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2244 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2247 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2249 *overlaps_a
= conflict_fn_not_known ();
2250 *overlaps_b
= conflict_fn_not_known ();
2251 *last_conflicts
= chrec_dont_know
;
2255 niter
= MIN (niter_x
, niter_z
);
2256 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2259 &last_conflicts_xz
, 1);
2260 niter
= MIN (niter_y
, niter_z
);
2261 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2264 &last_conflicts_yz
, 2);
2265 niter
= MIN (niter_x
, niter_z
);
2266 niter
= MIN (niter_y
, niter
);
2267 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2270 &last_conflicts_xyz
, 3);
2272 xz_p
= !integer_zerop (last_conflicts_xz
);
2273 yz_p
= !integer_zerop (last_conflicts_yz
);
2274 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2276 if (xz_p
|| yz_p
|| xyz_p
)
2278 ova1
= affine_fn_cst (integer_zero_node
);
2279 ova2
= affine_fn_cst (integer_zero_node
);
2280 ovb
= affine_fn_cst (integer_zero_node
);
2283 affine_fn t0
= ova1
;
2286 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2287 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2288 affine_fn_free (t0
);
2289 affine_fn_free (t2
);
2290 *last_conflicts
= last_conflicts_xz
;
2294 affine_fn t0
= ova2
;
2297 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2298 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2299 affine_fn_free (t0
);
2300 affine_fn_free (t2
);
2301 *last_conflicts
= last_conflicts_yz
;
2305 affine_fn t0
= ova1
;
2306 affine_fn t2
= ova2
;
2309 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2310 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2311 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2312 affine_fn_free (t0
);
2313 affine_fn_free (t2
);
2314 affine_fn_free (t4
);
2315 *last_conflicts
= last_conflicts_xyz
;
2317 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2318 *overlaps_b
= conflict_fn (1, ovb
);
2322 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2323 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2324 *last_conflicts
= integer_zero_node
;
2327 affine_fn_free (overlaps_a_xz
);
2328 affine_fn_free (overlaps_b_xz
);
2329 affine_fn_free (overlaps_a_yz
);
2330 affine_fn_free (overlaps_b_yz
);
2331 affine_fn_free (overlaps_a_xyz
);
2332 affine_fn_free (overlaps_b_xyz
);
2335 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2338 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2341 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2344 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2347 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2352 for (i
= 0; i
< m
; i
++)
2353 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2356 /* Store the N x N identity matrix in MAT. */
2359 lambda_matrix_id (lambda_matrix mat
, int size
)
2363 for (i
= 0; i
< size
; i
++)
2364 for (j
= 0; j
< size
; j
++)
2365 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2368 /* Return the first nonzero element of vector VEC1 between START and N.
2369 We must have START <= N. Returns N if VEC1 is the zero vector. */
2372 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2375 while (j
< n
&& vec1
[j
] == 0)
2380 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2381 R2 = R2 + CONST1 * R1. */
2384 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2391 for (i
= 0; i
< n
; i
++)
2392 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2395 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2396 and store the result in VEC2. */
2399 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2400 int size
, int const1
)
2405 lambda_vector_clear (vec2
, size
);
2407 for (i
= 0; i
< size
; i
++)
2408 vec2
[i
] = const1
* vec1
[i
];
2411 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2414 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2417 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2420 /* Negate row R1 of matrix MAT which has N columns. */
2423 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2425 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2428 /* Return true if two vectors are equal. */
2431 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2434 for (i
= 0; i
< size
; i
++)
2435 if (vec1
[i
] != vec2
[i
])
2440 /* Given an M x N integer matrix A, this function determines an M x
2441 M unimodular matrix U, and an M x N echelon matrix S such that
2442 "U.A = S". This decomposition is also known as "right Hermite".
2444 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2445 Restructuring Compilers" Utpal Banerjee. */
2448 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2449 lambda_matrix S
, lambda_matrix U
)
2453 lambda_matrix_copy (A
, S
, m
, n
);
2454 lambda_matrix_id (U
, m
);
2456 for (j
= 0; j
< n
; j
++)
2458 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2461 for (i
= m
- 1; i
>= i0
; i
--)
2463 while (S
[i
][j
] != 0)
2465 int sigma
, factor
, a
, b
;
2469 sigma
= (a
* b
< 0) ? -1: 1;
2472 factor
= sigma
* (a
/ b
);
2474 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2475 std::swap (S
[i
], S
[i
-1]);
2477 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2478 std::swap (U
[i
], U
[i
-1]);
2485 /* Determines the overlapping elements due to accesses CHREC_A and
2486 CHREC_B, that are affine functions. This function cannot handle
2487 symbolic evolution functions, ie. when initial conditions are
2488 parameters, because it uses lambda matrices of integers. */
2491 analyze_subscript_affine_affine (tree chrec_a
,
2493 conflict_function
**overlaps_a
,
2494 conflict_function
**overlaps_b
,
2495 tree
*last_conflicts
)
2497 unsigned nb_vars_a
, nb_vars_b
, dim
;
2498 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2499 lambda_matrix A
, U
, S
;
2500 struct obstack scratch_obstack
;
2502 if (eq_evolutions_p (chrec_a
, chrec_b
))
2504 /* The accessed index overlaps for each iteration in the
2506 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2507 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2508 *last_conflicts
= chrec_dont_know
;
2511 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2512 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2514 /* For determining the initial intersection, we have to solve a
2515 Diophantine equation. This is the most time consuming part.
2517 For answering to the question: "Is there a dependence?" we have
2518 to prove that there exists a solution to the Diophantine
2519 equation, and that the solution is in the iteration domain,
2520 i.e. the solution is positive or zero, and that the solution
2521 happens before the upper bound loop.nb_iterations. Otherwise
2522 there is no dependence. This function outputs a description of
2523 the iterations that hold the intersections. */
2525 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2526 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2528 gcc_obstack_init (&scratch_obstack
);
2530 dim
= nb_vars_a
+ nb_vars_b
;
2531 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2532 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2533 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2535 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2536 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2537 gamma
= init_b
- init_a
;
2539 /* Don't do all the hard work of solving the Diophantine equation
2540 when we already know the solution: for example,
2543 | gamma = 3 - 3 = 0.
2544 Then the first overlap occurs during the first iterations:
2545 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2549 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2551 HOST_WIDE_INT step_a
, step_b
;
2552 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2555 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2556 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2557 niter
= MIN (niter_a
, niter_b
);
2558 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2559 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2561 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2564 *overlaps_a
= conflict_fn (1, ova
);
2565 *overlaps_b
= conflict_fn (1, ovb
);
2568 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2569 compute_overlap_steps_for_affine_1_2
2570 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2572 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2573 compute_overlap_steps_for_affine_1_2
2574 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2579 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2580 *overlaps_a
= conflict_fn_not_known ();
2581 *overlaps_b
= conflict_fn_not_known ();
2582 *last_conflicts
= chrec_dont_know
;
2584 goto end_analyze_subs_aa
;
2588 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2593 lambda_matrix_row_negate (U
, dim
, 0);
2595 gcd_alpha_beta
= S
[0][0];
2597 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2598 but that is a quite strange case. Instead of ICEing, answer
2600 if (gcd_alpha_beta
== 0)
2602 *overlaps_a
= conflict_fn_not_known ();
2603 *overlaps_b
= conflict_fn_not_known ();
2604 *last_conflicts
= chrec_dont_know
;
2605 goto end_analyze_subs_aa
;
2608 /* The classic "gcd-test". */
2609 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2611 /* The "gcd-test" has determined that there is no integer
2612 solution, i.e. there is no dependence. */
2613 *overlaps_a
= conflict_fn_no_dependence ();
2614 *overlaps_b
= conflict_fn_no_dependence ();
2615 *last_conflicts
= integer_zero_node
;
2618 /* Both access functions are univariate. This includes SIV and MIV cases. */
2619 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2621 /* Both functions should have the same evolution sign. */
2622 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2623 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2625 /* The solutions are given by:
2627 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2630 For a given integer t. Using the following variables,
2632 | i0 = u11 * gamma / gcd_alpha_beta
2633 | j0 = u12 * gamma / gcd_alpha_beta
2640 | y0 = j0 + j1 * t. */
2641 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2643 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2644 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2648 if ((i1
== 0 && i0
< 0)
2649 || (j1
== 0 && j0
< 0))
2651 /* There is no solution.
2652 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2653 falls in here, but for the moment we don't look at the
2654 upper bound of the iteration domain. */
2655 *overlaps_a
= conflict_fn_no_dependence ();
2656 *overlaps_b
= conflict_fn_no_dependence ();
2657 *last_conflicts
= integer_zero_node
;
2658 goto end_analyze_subs_aa
;
2661 if (i1
> 0 && j1
> 0)
2663 HOST_WIDE_INT niter_a
2664 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2665 HOST_WIDE_INT niter_b
2666 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2667 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2669 /* (X0, Y0) is a solution of the Diophantine equation:
2670 "chrec_a (X0) = chrec_b (Y0)". */
2671 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2673 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2674 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2676 /* (X1, Y1) is the smallest positive solution of the eq
2677 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2678 first conflict occurs. */
2679 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2680 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2681 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2685 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2686 FLOOR_DIV (niter
- j0
, j1
));
2687 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2689 /* If the overlap occurs outside of the bounds of the
2690 loop, there is no dependence. */
2691 if (x1
>= niter
|| y1
>= niter
)
2693 *overlaps_a
= conflict_fn_no_dependence ();
2694 *overlaps_b
= conflict_fn_no_dependence ();
2695 *last_conflicts
= integer_zero_node
;
2696 goto end_analyze_subs_aa
;
2699 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2702 *last_conflicts
= chrec_dont_know
;
2706 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2708 build_int_cst (NULL_TREE
, i1
)));
2711 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2713 build_int_cst (NULL_TREE
, j1
)));
2717 /* FIXME: For the moment, the upper bound of the
2718 iteration domain for i and j is not checked. */
2719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2720 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2721 *overlaps_a
= conflict_fn_not_known ();
2722 *overlaps_b
= conflict_fn_not_known ();
2723 *last_conflicts
= chrec_dont_know
;
2728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2729 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2730 *overlaps_a
= conflict_fn_not_known ();
2731 *overlaps_b
= conflict_fn_not_known ();
2732 *last_conflicts
= chrec_dont_know
;
2737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2738 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2739 *overlaps_a
= conflict_fn_not_known ();
2740 *overlaps_b
= conflict_fn_not_known ();
2741 *last_conflicts
= chrec_dont_know
;
2744 end_analyze_subs_aa
:
2745 obstack_free (&scratch_obstack
, NULL
);
2746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2748 fprintf (dump_file
, " (overlaps_a = ");
2749 dump_conflict_function (dump_file
, *overlaps_a
);
2750 fprintf (dump_file
, ")\n (overlaps_b = ");
2751 dump_conflict_function (dump_file
, *overlaps_b
);
2752 fprintf (dump_file
, "))\n");
2756 /* Returns true when analyze_subscript_affine_affine can be used for
2757 determining the dependence relation between chrec_a and chrec_b,
2758 that contain symbols. This function modifies chrec_a and chrec_b
2759 such that the analysis result is the same, and such that they don't
2760 contain symbols, and then can safely be passed to the analyzer.
2762 Example: The analysis of the following tuples of evolutions produce
2763 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2766 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2767 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2771 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2773 tree diff
, type
, left_a
, left_b
, right_b
;
2775 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2776 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2777 /* FIXME: For the moment not handled. Might be refined later. */
2780 type
= chrec_type (*chrec_a
);
2781 left_a
= CHREC_LEFT (*chrec_a
);
2782 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2783 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2785 if (!evolution_function_is_constant_p (diff
))
2788 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2789 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2791 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2792 diff
, CHREC_RIGHT (*chrec_a
));
2793 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2794 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2795 build_int_cst (type
, 0),
2800 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2801 *OVERLAPS_B are initialized to the functions that describe the
2802 relation between the elements accessed twice by CHREC_A and
2803 CHREC_B. For k >= 0, the following property is verified:
2805 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2808 analyze_siv_subscript (tree chrec_a
,
2810 conflict_function
**overlaps_a
,
2811 conflict_function
**overlaps_b
,
2812 tree
*last_conflicts
,
2815 dependence_stats
.num_siv
++;
2817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2818 fprintf (dump_file
, "(analyze_siv_subscript \n");
2820 if (evolution_function_is_constant_p (chrec_a
)
2821 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2822 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2823 overlaps_a
, overlaps_b
, last_conflicts
);
2825 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2826 && evolution_function_is_constant_p (chrec_b
))
2827 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2828 overlaps_b
, overlaps_a
, last_conflicts
);
2830 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2831 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2833 if (!chrec_contains_symbols (chrec_a
)
2834 && !chrec_contains_symbols (chrec_b
))
2836 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2837 overlaps_a
, overlaps_b
,
2840 if (CF_NOT_KNOWN_P (*overlaps_a
)
2841 || CF_NOT_KNOWN_P (*overlaps_b
))
2842 dependence_stats
.num_siv_unimplemented
++;
2843 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2844 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2845 dependence_stats
.num_siv_independent
++;
2847 dependence_stats
.num_siv_dependent
++;
2849 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2852 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2853 overlaps_a
, overlaps_b
,
2856 if (CF_NOT_KNOWN_P (*overlaps_a
)
2857 || CF_NOT_KNOWN_P (*overlaps_b
))
2858 dependence_stats
.num_siv_unimplemented
++;
2859 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2860 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2861 dependence_stats
.num_siv_independent
++;
2863 dependence_stats
.num_siv_dependent
++;
2866 goto siv_subscript_dontknow
;
2871 siv_subscript_dontknow
:;
2872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2873 fprintf (dump_file
, " siv test failed: unimplemented");
2874 *overlaps_a
= conflict_fn_not_known ();
2875 *overlaps_b
= conflict_fn_not_known ();
2876 *last_conflicts
= chrec_dont_know
;
2877 dependence_stats
.num_siv_unimplemented
++;
2880 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2881 fprintf (dump_file
, ")\n");
2884 /* Returns false if we can prove that the greatest common divisor of the steps
2885 of CHREC does not divide CST, false otherwise. */
2888 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2890 HOST_WIDE_INT cd
= 0, val
;
2893 if (!tree_fits_shwi_p (cst
))
2895 val
= tree_to_shwi (cst
);
2897 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2899 step
= CHREC_RIGHT (chrec
);
2900 if (!tree_fits_shwi_p (step
))
2902 cd
= gcd (cd
, tree_to_shwi (step
));
2903 chrec
= CHREC_LEFT (chrec
);
2906 return val
% cd
== 0;
2909 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2910 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2911 functions that describe the relation between the elements accessed
2912 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2915 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2918 analyze_miv_subscript (tree chrec_a
,
2920 conflict_function
**overlaps_a
,
2921 conflict_function
**overlaps_b
,
2922 tree
*last_conflicts
,
2923 struct loop
*loop_nest
)
2925 tree type
, difference
;
2927 dependence_stats
.num_miv
++;
2928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2929 fprintf (dump_file
, "(analyze_miv_subscript \n");
2931 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2932 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2933 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2934 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2936 if (eq_evolutions_p (chrec_a
, chrec_b
))
2938 /* Access functions are the same: all the elements are accessed
2939 in the same order. */
2940 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2941 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2942 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2943 dependence_stats
.num_miv_dependent
++;
2946 else if (evolution_function_is_constant_p (difference
)
2947 /* For the moment, the following is verified:
2948 evolution_function_is_affine_multivariate_p (chrec_a,
2950 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2952 /* testsuite/.../ssa-chrec-33.c
2953 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2955 The difference is 1, and all the evolution steps are multiples
2956 of 2, consequently there are no overlapping elements. */
2957 *overlaps_a
= conflict_fn_no_dependence ();
2958 *overlaps_b
= conflict_fn_no_dependence ();
2959 *last_conflicts
= integer_zero_node
;
2960 dependence_stats
.num_miv_independent
++;
2963 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2964 && !chrec_contains_symbols (chrec_a
)
2965 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2966 && !chrec_contains_symbols (chrec_b
))
2968 /* testsuite/.../ssa-chrec-35.c
2969 {0, +, 1}_2 vs. {0, +, 1}_3
2970 the overlapping elements are respectively located at iterations:
2971 {0, +, 1}_x and {0, +, 1}_x,
2972 in other words, we have the equality:
2973 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2976 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2977 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2979 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2980 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2982 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2983 overlaps_a
, overlaps_b
, last_conflicts
);
2985 if (CF_NOT_KNOWN_P (*overlaps_a
)
2986 || CF_NOT_KNOWN_P (*overlaps_b
))
2987 dependence_stats
.num_miv_unimplemented
++;
2988 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2989 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2990 dependence_stats
.num_miv_independent
++;
2992 dependence_stats
.num_miv_dependent
++;
2997 /* When the analysis is too difficult, answer "don't know". */
2998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2999 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
3001 *overlaps_a
= conflict_fn_not_known ();
3002 *overlaps_b
= conflict_fn_not_known ();
3003 *last_conflicts
= chrec_dont_know
;
3004 dependence_stats
.num_miv_unimplemented
++;
3007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3008 fprintf (dump_file
, ")\n");
3011 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3012 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3013 OVERLAP_ITERATIONS_B are initialized with two functions that
3014 describe the iterations that contain conflicting elements.
3016 Remark: For an integer k >= 0, the following equality is true:
3018 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3022 analyze_overlapping_iterations (tree chrec_a
,
3024 conflict_function
**overlap_iterations_a
,
3025 conflict_function
**overlap_iterations_b
,
3026 tree
*last_conflicts
, struct loop
*loop_nest
)
3028 unsigned int lnn
= loop_nest
->num
;
3030 dependence_stats
.num_subscript_tests
++;
3032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3034 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
3035 fprintf (dump_file
, " (chrec_a = ");
3036 print_generic_expr (dump_file
, chrec_a
, 0);
3037 fprintf (dump_file
, ")\n (chrec_b = ");
3038 print_generic_expr (dump_file
, chrec_b
, 0);
3039 fprintf (dump_file
, ")\n");
3042 if (chrec_a
== NULL_TREE
3043 || chrec_b
== NULL_TREE
3044 || chrec_contains_undetermined (chrec_a
)
3045 || chrec_contains_undetermined (chrec_b
))
3047 dependence_stats
.num_subscript_undetermined
++;
3049 *overlap_iterations_a
= conflict_fn_not_known ();
3050 *overlap_iterations_b
= conflict_fn_not_known ();
3053 /* If they are the same chrec, and are affine, they overlap
3054 on every iteration. */
3055 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3056 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3057 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3059 dependence_stats
.num_same_subscript_function
++;
3060 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3061 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3062 *last_conflicts
= chrec_dont_know
;
3065 /* If they aren't the same, and aren't affine, we can't do anything
3067 else if ((chrec_contains_symbols (chrec_a
)
3068 || chrec_contains_symbols (chrec_b
))
3069 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3070 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3072 dependence_stats
.num_subscript_undetermined
++;
3073 *overlap_iterations_a
= conflict_fn_not_known ();
3074 *overlap_iterations_b
= conflict_fn_not_known ();
3077 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3078 analyze_ziv_subscript (chrec_a
, chrec_b
,
3079 overlap_iterations_a
, overlap_iterations_b
,
3082 else if (siv_subscript_p (chrec_a
, chrec_b
))
3083 analyze_siv_subscript (chrec_a
, chrec_b
,
3084 overlap_iterations_a
, overlap_iterations_b
,
3085 last_conflicts
, lnn
);
3088 analyze_miv_subscript (chrec_a
, chrec_b
,
3089 overlap_iterations_a
, overlap_iterations_b
,
3090 last_conflicts
, loop_nest
);
3092 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3094 fprintf (dump_file
, " (overlap_iterations_a = ");
3095 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3096 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3097 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3098 fprintf (dump_file
, "))\n");
3102 /* Helper function for uniquely inserting distance vectors. */
3105 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3110 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3111 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3114 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3117 /* Helper function for uniquely inserting direction vectors. */
3120 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3125 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3126 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3129 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3132 /* Add a distance of 1 on all the loops outer than INDEX. If we
3133 haven't yet determined a distance for this outer loop, push a new
3134 distance vector composed of the previous distance, and a distance
3135 of 1 for this outer loop. Example:
3143 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3144 save (0, 1), then we have to save (1, 0). */
3147 add_outer_distances (struct data_dependence_relation
*ddr
,
3148 lambda_vector dist_v
, int index
)
3150 /* For each outer loop where init_v is not set, the accesses are
3151 in dependence of distance 1 in the loop. */
3152 while (--index
>= 0)
3154 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3155 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3157 save_dist_v (ddr
, save_v
);
3161 /* Return false when fail to represent the data dependence as a
3162 distance vector. INIT_B is set to true when a component has been
3163 added to the distance vector DIST_V. INDEX_CARRY is then set to
3164 the index in DIST_V that carries the dependence. */
3167 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3168 struct data_reference
*ddr_a
,
3169 struct data_reference
*ddr_b
,
3170 lambda_vector dist_v
, bool *init_b
,
3174 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3176 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3178 tree access_fn_a
, access_fn_b
;
3179 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3181 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3183 non_affine_dependence_relation (ddr
);
3187 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3188 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3190 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3191 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3194 int var_a
= CHREC_VARIABLE (access_fn_a
);
3195 int var_b
= CHREC_VARIABLE (access_fn_b
);
3198 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3200 non_affine_dependence_relation (ddr
);
3204 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3205 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3206 *index_carry
= MIN (index
, *index_carry
);
3208 /* This is the subscript coupling test. If we have already
3209 recorded a distance for this loop (a distance coming from
3210 another subscript), it should be the same. For example,
3211 in the following code, there is no dependence:
3218 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3220 finalize_ddr_dependent (ddr
, chrec_known
);
3224 dist_v
[index
] = dist
;
3228 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3230 /* This can be for example an affine vs. constant dependence
3231 (T[i] vs. T[3]) that is not an affine dependence and is
3232 not representable as a distance vector. */
3233 non_affine_dependence_relation (ddr
);
3241 /* Return true when the DDR contains only constant access functions. */
3244 constant_access_functions (const struct data_dependence_relation
*ddr
)
3248 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3249 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3250 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3256 /* Helper function for the case where DDR_A and DDR_B are the same
3257 multivariate access function with a constant step. For an example
3261 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3264 tree c_1
= CHREC_LEFT (c_2
);
3265 tree c_0
= CHREC_LEFT (c_1
);
3266 lambda_vector dist_v
;
3269 /* Polynomials with more than 2 variables are not handled yet. When
3270 the evolution steps are parameters, it is not possible to
3271 represent the dependence using classical distance vectors. */
3272 if (TREE_CODE (c_0
) != INTEGER_CST
3273 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3274 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3276 DDR_AFFINE_P (ddr
) = false;
3280 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3281 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3283 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3284 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3285 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3286 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3299 save_dist_v (ddr
, dist_v
);
3301 add_outer_distances (ddr
, dist_v
, x_1
);
3304 /* Helper function for the case where DDR_A and DDR_B are the same
3305 access functions. */
3308 add_other_self_distances (struct data_dependence_relation
*ddr
)
3310 lambda_vector dist_v
;
3312 int index_carry
= DDR_NB_LOOPS (ddr
);
3314 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3316 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3318 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3320 if (!evolution_function_is_univariate_p (access_fun
))
3322 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3324 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3328 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3330 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3331 add_multivariate_self_dist (ddr
, access_fun
);
3333 /* The evolution step is not constant: it varies in
3334 the outer loop, so this cannot be represented by a
3335 distance vector. For example in pr34635.c the
3336 evolution is {0, +, {0, +, 4}_1}_2. */
3337 DDR_AFFINE_P (ddr
) = false;
3342 index_carry
= MIN (index_carry
,
3343 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3344 DDR_LOOP_NEST (ddr
)));
3348 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3349 add_outer_distances (ddr
, dist_v
, index_carry
);
3353 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3355 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3357 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3358 save_dist_v (ddr
, dist_v
);
3361 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3362 is the case for example when access functions are the same and
3363 equal to a constant, as in:
3370 in which case the distance vectors are (0) and (1). */
3373 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3377 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3379 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3380 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3381 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3383 for (j
= 0; j
< ca
->n
; j
++)
3384 if (affine_function_zero_p (ca
->fns
[j
]))
3386 insert_innermost_unit_dist_vector (ddr
);
3390 for (j
= 0; j
< cb
->n
; j
++)
3391 if (affine_function_zero_p (cb
->fns
[j
]))
3393 insert_innermost_unit_dist_vector (ddr
);
3399 /* Compute the classic per loop distance vector. DDR is the data
3400 dependence relation to build a vector from. Return false when fail
3401 to represent the data dependence as a distance vector. */
3404 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3405 struct loop
*loop_nest
)
3407 bool init_b
= false;
3408 int index_carry
= DDR_NB_LOOPS (ddr
);
3409 lambda_vector dist_v
;
3411 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3414 if (same_access_functions (ddr
))
3416 /* Save the 0 vector. */
3417 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3418 save_dist_v (ddr
, dist_v
);
3420 if (constant_access_functions (ddr
))
3421 add_distance_for_zero_overlaps (ddr
);
3423 if (DDR_NB_LOOPS (ddr
) > 1)
3424 add_other_self_distances (ddr
);
3429 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3430 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3431 dist_v
, &init_b
, &index_carry
))
3434 /* Save the distance vector if we initialized one. */
3437 /* Verify a basic constraint: classic distance vectors should
3438 always be lexicographically positive.
3440 Data references are collected in the order of execution of
3441 the program, thus for the following loop
3443 | for (i = 1; i < 100; i++)
3444 | for (j = 1; j < 100; j++)
3446 | t = T[j+1][i-1]; // A
3447 | T[j][i] = t + 2; // B
3450 references are collected following the direction of the wind:
3451 A then B. The data dependence tests are performed also
3452 following this order, such that we're looking at the distance
3453 separating the elements accessed by A from the elements later
3454 accessed by B. But in this example, the distance returned by
3455 test_dep (A, B) is lexicographically negative (-1, 1), that
3456 means that the access A occurs later than B with respect to
3457 the outer loop, ie. we're actually looking upwind. In this
3458 case we solve test_dep (B, A) looking downwind to the
3459 lexicographically positive solution, that returns the
3460 distance vector (1, -1). */
3461 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3463 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3464 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3467 compute_subscript_distance (ddr
);
3468 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3469 save_v
, &init_b
, &index_carry
))
3471 save_dist_v (ddr
, save_v
);
3472 DDR_REVERSED_P (ddr
) = true;
3474 /* In this case there is a dependence forward for all the
3477 | for (k = 1; k < 100; k++)
3478 | for (i = 1; i < 100; i++)
3479 | for (j = 1; j < 100; j++)
3481 | t = T[j+1][i-1]; // A
3482 | T[j][i] = t + 2; // B
3490 if (DDR_NB_LOOPS (ddr
) > 1)
3492 add_outer_distances (ddr
, save_v
, index_carry
);
3493 add_outer_distances (ddr
, dist_v
, index_carry
);
3498 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3499 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3501 if (DDR_NB_LOOPS (ddr
) > 1)
3503 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3505 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3506 DDR_A (ddr
), loop_nest
))
3508 compute_subscript_distance (ddr
);
3509 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3510 opposite_v
, &init_b
,
3514 save_dist_v (ddr
, save_v
);
3515 add_outer_distances (ddr
, dist_v
, index_carry
);
3516 add_outer_distances (ddr
, opposite_v
, index_carry
);
3519 save_dist_v (ddr
, save_v
);
3524 /* There is a distance of 1 on all the outer loops: Example:
3525 there is a dependence of distance 1 on loop_1 for the array A.
3531 add_outer_distances (ddr
, dist_v
,
3532 lambda_vector_first_nz (dist_v
,
3533 DDR_NB_LOOPS (ddr
), 0));
3536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3540 fprintf (dump_file
, "(build_classic_dist_vector\n");
3541 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3543 fprintf (dump_file
, " dist_vector = (");
3544 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3545 DDR_NB_LOOPS (ddr
));
3546 fprintf (dump_file
, " )\n");
3548 fprintf (dump_file
, ")\n");
3554 /* Return the direction for a given distance.
3555 FIXME: Computing dir this way is suboptimal, since dir can catch
3556 cases that dist is unable to represent. */
3558 static inline enum data_dependence_direction
3559 dir_from_dist (int dist
)
3562 return dir_positive
;
3564 return dir_negative
;
3569 /* Compute the classic per loop direction vector. DDR is the data
3570 dependence relation to build a vector from. */
3573 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3576 lambda_vector dist_v
;
3578 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3580 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3582 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3583 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3585 save_dir_v (ddr
, dir_v
);
3589 /* Helper function. Returns true when there is a dependence between
3590 data references DRA and DRB. */
3593 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3594 struct data_reference
*dra
,
3595 struct data_reference
*drb
,
3596 struct loop
*loop_nest
)
3599 tree last_conflicts
;
3600 struct subscript
*subscript
;
3601 tree res
= NULL_TREE
;
3603 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3605 conflict_function
*overlaps_a
, *overlaps_b
;
3607 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3608 DR_ACCESS_FN (drb
, i
),
3609 &overlaps_a
, &overlaps_b
,
3610 &last_conflicts
, loop_nest
);
3612 if (SUB_CONFLICTS_IN_A (subscript
))
3613 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3614 if (SUB_CONFLICTS_IN_B (subscript
))
3615 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3617 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3618 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3619 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3621 /* If there is any undetermined conflict function we have to
3622 give a conservative answer in case we cannot prove that
3623 no dependence exists when analyzing another subscript. */
3624 if (CF_NOT_KNOWN_P (overlaps_a
)
3625 || CF_NOT_KNOWN_P (overlaps_b
))
3627 res
= chrec_dont_know
;
3631 /* When there is a subscript with no dependence we can stop. */
3632 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3633 || CF_NO_DEPENDENCE_P (overlaps_b
))
3640 if (res
== NULL_TREE
)
3643 if (res
== chrec_known
)
3644 dependence_stats
.num_dependence_independent
++;
3646 dependence_stats
.num_dependence_undetermined
++;
3647 finalize_ddr_dependent (ddr
, res
);
3651 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3654 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3655 struct loop
*loop_nest
)
3657 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3658 dependence_stats
.num_dependence_dependent
++;
3660 compute_subscript_distance (ddr
);
3661 if (build_classic_dist_vector (ddr
, loop_nest
))
3662 build_classic_dir_vector (ddr
);
3665 /* Returns true when all the access functions of A are affine or
3666 constant with respect to LOOP_NEST. */
3669 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3670 const struct loop
*loop_nest
)
3673 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3676 FOR_EACH_VEC_ELT (fns
, i
, t
)
3677 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3678 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3684 /* This computes the affine dependence relation between A and B with
3685 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3686 independence between two accesses, while CHREC_DONT_KNOW is used
3687 for representing the unknown relation.
3689 Note that it is possible to stop the computation of the dependence
3690 relation the first time we detect a CHREC_KNOWN element for a given
3694 compute_affine_dependence (struct data_dependence_relation
*ddr
,
3695 struct loop
*loop_nest
)
3697 struct data_reference
*dra
= DDR_A (ddr
);
3698 struct data_reference
*drb
= DDR_B (ddr
);
3700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3702 fprintf (dump_file
, "(compute_affine_dependence\n");
3703 fprintf (dump_file
, " stmt_a: ");
3704 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
3705 fprintf (dump_file
, " stmt_b: ");
3706 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
3709 /* Analyze only when the dependence relation is not yet known. */
3710 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
3712 dependence_stats
.num_dependence_tests
++;
3714 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
3715 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
3716 subscript_dependence_tester (ddr
, loop_nest
);
3718 /* As a last case, if the dependence cannot be determined, or if
3719 the dependence is considered too difficult to determine, answer
3723 dependence_stats
.num_dependence_undetermined
++;
3725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3727 fprintf (dump_file
, "Data ref a:\n");
3728 dump_data_reference (dump_file
, dra
);
3729 fprintf (dump_file
, "Data ref b:\n");
3730 dump_data_reference (dump_file
, drb
);
3731 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
3733 finalize_ddr_dependent (ddr
, chrec_dont_know
);
3737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3739 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
3740 fprintf (dump_file
, ") -> no dependence\n");
3741 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
3742 fprintf (dump_file
, ") -> dependence analysis failed\n");
3744 fprintf (dump_file
, ")\n");
3748 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3749 the data references in DATAREFS, in the LOOP_NEST. When
3750 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3751 relations. Return true when successful, i.e. data references number
3752 is small enough to be handled. */
3755 compute_all_dependences (vec
<data_reference_p
> datarefs
,
3756 vec
<ddr_p
> *dependence_relations
,
3757 vec
<loop_p
> loop_nest
,
3758 bool compute_self_and_rr
)
3760 struct data_dependence_relation
*ddr
;
3761 struct data_reference
*a
, *b
;
3764 if ((int) datarefs
.length ()
3765 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
3767 struct data_dependence_relation
*ddr
;
3769 /* Insert a single relation into dependence_relations:
3771 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
3772 dependence_relations
->safe_push (ddr
);
3776 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
3777 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
3778 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
3780 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
3781 dependence_relations
->safe_push (ddr
);
3782 if (loop_nest
.exists ())
3783 compute_affine_dependence (ddr
, loop_nest
[0]);
3786 if (compute_self_and_rr
)
3787 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
3789 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
3790 dependence_relations
->safe_push (ddr
);
3791 if (loop_nest
.exists ())
3792 compute_affine_dependence (ddr
, loop_nest
[0]);
3798 /* Describes a location of a memory reference. */
3802 /* The memory reference. */
3805 /* True if the memory reference is read. */
3810 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3811 true if STMT clobbers memory, false otherwise. */
3814 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
3816 bool clobbers_memory
= false;
3819 enum gimple_code stmt_code
= gimple_code (stmt
);
3821 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3822 As we cannot model data-references to not spelled out
3823 accesses give up if they may occur. */
3824 if (stmt_code
== GIMPLE_CALL
3825 && !(gimple_call_flags (stmt
) & ECF_CONST
))
3827 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
3828 if (gimple_call_internal_p (stmt
))
3829 switch (gimple_call_internal_fn (stmt
))
3831 case IFN_GOMP_SIMD_LANE
:
3833 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
3834 tree uid
= gimple_call_arg (stmt
, 0);
3835 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
3837 || loop
->simduid
!= SSA_NAME_VAR (uid
))
3838 clobbers_memory
= true;
3842 case IFN_MASK_STORE
:
3845 clobbers_memory
= true;
3849 clobbers_memory
= true;
3851 else if (stmt_code
== GIMPLE_ASM
3852 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
3853 || gimple_vuse (stmt
)))
3854 clobbers_memory
= true;
3856 if (!gimple_vuse (stmt
))
3857 return clobbers_memory
;
3859 if (stmt_code
== GIMPLE_ASSIGN
)
3862 op0
= gimple_assign_lhs (stmt
);
3863 op1
= gimple_assign_rhs1 (stmt
);
3866 || (REFERENCE_CLASS_P (op1
)
3867 && (base
= get_base_address (op1
))
3868 && TREE_CODE (base
) != SSA_NAME
))
3872 references
->safe_push (ref
);
3875 else if (stmt_code
== GIMPLE_CALL
)
3879 ref
.is_read
= false;
3880 if (gimple_call_internal_p (stmt
))
3881 switch (gimple_call_internal_fn (stmt
))
3884 if (gimple_call_lhs (stmt
) == NULL_TREE
)
3887 case IFN_MASK_STORE
:
3888 ref
.ref
= fold_build2 (MEM_REF
,
3890 ? TREE_TYPE (gimple_call_lhs (stmt
))
3891 : TREE_TYPE (gimple_call_arg (stmt
, 3)),
3892 gimple_call_arg (stmt
, 0),
3893 gimple_call_arg (stmt
, 1));
3894 references
->safe_push (ref
);
3900 op0
= gimple_call_lhs (stmt
);
3901 n
= gimple_call_num_args (stmt
);
3902 for (i
= 0; i
< n
; i
++)
3904 op1
= gimple_call_arg (stmt
, i
);
3907 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
3911 references
->safe_push (ref
);
3916 return clobbers_memory
;
3920 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
3923 ref
.is_read
= false;
3924 references
->safe_push (ref
);
3926 return clobbers_memory
;
3930 /* Returns true if the loop-nest has any data reference. */
3933 loop_nest_has_data_refs (loop_p loop
)
3935 basic_block
*bbs
= get_loop_body (loop
);
3936 vec
<data_ref_loc
> references
;
3937 references
.create (3);
3939 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
3941 basic_block bb
= bbs
[i
];
3942 gimple_stmt_iterator bsi
;
3944 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
3946 gimple
*stmt
= gsi_stmt (bsi
);
3947 get_references_in_stmt (stmt
, &references
);
3948 if (references
.length ())
3951 references
.release ();
3957 references
.release ();
3964 if (loop_nest_has_data_refs (loop
))
3972 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3973 reference, returns false, otherwise returns true. NEST is the outermost
3974 loop of the loop nest in which the references should be analyzed. */
3977 find_data_references_in_stmt (struct loop
*nest
, gimple
*stmt
,
3978 vec
<data_reference_p
> *datarefs
)
3981 auto_vec
<data_ref_loc
, 2> references
;
3984 data_reference_p dr
;
3986 if (get_references_in_stmt (stmt
, &references
))
3989 FOR_EACH_VEC_ELT (references
, i
, ref
)
3991 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
3992 ref
->ref
, stmt
, ref
->is_read
);
3993 gcc_assert (dr
!= NULL
);
3994 datarefs
->safe_push (dr
);
3996 references
.release ();
4000 /* Stores the data references in STMT to DATAREFS. If there is an
4001 unanalyzable reference, returns false, otherwise returns true.
4002 NEST is the outermost loop of the loop nest in which the references
4003 should be instantiated, LOOP is the loop in which the references
4004 should be analyzed. */
4007 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple
*stmt
,
4008 vec
<data_reference_p
> *datarefs
)
4011 auto_vec
<data_ref_loc
, 2> references
;
4014 data_reference_p dr
;
4016 if (get_references_in_stmt (stmt
, &references
))
4019 FOR_EACH_VEC_ELT (references
, i
, ref
)
4021 dr
= create_data_ref (nest
, loop
, ref
->ref
, stmt
, ref
->is_read
);
4022 gcc_assert (dr
!= NULL
);
4023 datarefs
->safe_push (dr
);
4026 references
.release ();
4030 /* Search the data references in LOOP, and record the information into
4031 DATAREFS. Returns chrec_dont_know when failing to analyze a
4032 difficult case, returns NULL_TREE otherwise. */
4035 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4036 vec
<data_reference_p
> *datarefs
)
4038 gimple_stmt_iterator bsi
;
4040 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4042 gimple
*stmt
= gsi_stmt (bsi
);
4044 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4046 struct data_reference
*res
;
4047 res
= XCNEW (struct data_reference
);
4048 datarefs
->safe_push (res
);
4050 return chrec_dont_know
;
4057 /* Search the data references in LOOP, and record the information into
4058 DATAREFS. Returns chrec_dont_know when failing to analyze a
4059 difficult case, returns NULL_TREE otherwise.
4061 TODO: This function should be made smarter so that it can handle address
4062 arithmetic as if they were array accesses, etc. */
4065 find_data_references_in_loop (struct loop
*loop
,
4066 vec
<data_reference_p
> *datarefs
)
4068 basic_block bb
, *bbs
;
4071 bbs
= get_loop_body_in_dom_order (loop
);
4073 for (i
= 0; i
< loop
->num_nodes
; i
++)
4077 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4080 return chrec_dont_know
;
4088 /* Recursive helper function. */
4091 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4093 /* Inner loops of the nest should not contain siblings. Example:
4094 when there are two consecutive loops,
4105 the dependence relation cannot be captured by the distance
4110 loop_nest
->safe_push (loop
);
4112 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4116 /* Return false when the LOOP is not well nested. Otherwise return
4117 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4118 contain the loops from the outermost to the innermost, as they will
4119 appear in the classic distance vector. */
4122 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4124 loop_nest
->safe_push (loop
);
4126 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4130 /* Returns true when the data dependences have been computed, false otherwise.
4131 Given a loop nest LOOP, the following vectors are returned:
4132 DATAREFS is initialized to all the array elements contained in this loop,
4133 DEPENDENCE_RELATIONS contains the relations between the data references.
4134 Compute read-read and self relations if
4135 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4138 compute_data_dependences_for_loop (struct loop
*loop
,
4139 bool compute_self_and_read_read_dependences
,
4140 vec
<loop_p
> *loop_nest
,
4141 vec
<data_reference_p
> *datarefs
,
4142 vec
<ddr_p
> *dependence_relations
)
4146 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4148 /* If the loop nest is not well formed, or one of the data references
4149 is not computable, give up without spending time to compute other
4152 || !find_loop_nest (loop
, loop_nest
)
4153 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4154 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4155 compute_self_and_read_read_dependences
))
4158 if (dump_file
&& (dump_flags
& TDF_STATS
))
4160 fprintf (dump_file
, "Dependence tester statistics:\n");
4162 fprintf (dump_file
, "Number of dependence tests: %d\n",
4163 dependence_stats
.num_dependence_tests
);
4164 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4165 dependence_stats
.num_dependence_dependent
);
4166 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4167 dependence_stats
.num_dependence_independent
);
4168 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4169 dependence_stats
.num_dependence_undetermined
);
4171 fprintf (dump_file
, "Number of subscript tests: %d\n",
4172 dependence_stats
.num_subscript_tests
);
4173 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4174 dependence_stats
.num_subscript_undetermined
);
4175 fprintf (dump_file
, "Number of same subscript function: %d\n",
4176 dependence_stats
.num_same_subscript_function
);
4178 fprintf (dump_file
, "Number of ziv tests: %d\n",
4179 dependence_stats
.num_ziv
);
4180 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4181 dependence_stats
.num_ziv_dependent
);
4182 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4183 dependence_stats
.num_ziv_independent
);
4184 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4185 dependence_stats
.num_ziv_unimplemented
);
4187 fprintf (dump_file
, "Number of siv tests: %d\n",
4188 dependence_stats
.num_siv
);
4189 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4190 dependence_stats
.num_siv_dependent
);
4191 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4192 dependence_stats
.num_siv_independent
);
4193 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4194 dependence_stats
.num_siv_unimplemented
);
4196 fprintf (dump_file
, "Number of miv tests: %d\n",
4197 dependence_stats
.num_miv
);
4198 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4199 dependence_stats
.num_miv_dependent
);
4200 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4201 dependence_stats
.num_miv_independent
);
4202 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4203 dependence_stats
.num_miv_unimplemented
);
4209 /* Free the memory used by a data dependence relation DDR. */
4212 free_dependence_relation (struct data_dependence_relation
*ddr
)
4217 if (DDR_SUBSCRIPTS (ddr
).exists ())
4218 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4219 DDR_DIST_VECTS (ddr
).release ();
4220 DDR_DIR_VECTS (ddr
).release ();
4225 /* Free the memory used by the data dependence relations from
4226 DEPENDENCE_RELATIONS. */
4229 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4232 struct data_dependence_relation
*ddr
;
4234 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4236 free_dependence_relation (ddr
);
4238 dependence_relations
.release ();
4241 /* Free the memory used by the data references from DATAREFS. */
4244 free_data_refs (vec
<data_reference_p
> datarefs
)
4247 struct data_reference
*dr
;
4249 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4251 datarefs
.release ();