1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
87 #include "fold-const.h"
90 #include "hard-reg-set.h"
94 #include "statistics.h"
95 #include "insn-config.h"
100 #include "emit-rtl.h"
104 #include "gimple-pretty-print.h"
106 #include "dominance.h"
108 #include "basic-block.h"
109 #include "tree-ssa-alias.h"
110 #include "internal-fn.h"
111 #include "gimple-expr.h"
114 #include "gimple-iterator.h"
115 #include "tree-ssa-loop-niter.h"
116 #include "tree-ssa-loop.h"
117 #include "tree-ssa.h"
119 #include "tree-data-ref.h"
120 #include "tree-scalar-evolution.h"
121 #include "dumpfile.h"
122 #include "langhooks.h"
123 #include "tree-affine.h"
126 static struct datadep_stats
128 int num_dependence_tests
;
129 int num_dependence_dependent
;
130 int num_dependence_independent
;
131 int num_dependence_undetermined
;
133 int num_subscript_tests
;
134 int num_subscript_undetermined
;
135 int num_same_subscript_function
;
138 int num_ziv_independent
;
139 int num_ziv_dependent
;
140 int num_ziv_unimplemented
;
143 int num_siv_independent
;
144 int num_siv_dependent
;
145 int num_siv_unimplemented
;
148 int num_miv_independent
;
149 int num_miv_dependent
;
150 int num_miv_unimplemented
;
153 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
154 struct data_reference
*,
155 struct data_reference
*,
157 /* Returns true iff A divides B. */
160 tree_fold_divides_p (const_tree a
, const_tree b
)
162 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
163 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
164 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
167 /* Returns true iff A divides B. */
170 int_divides_p (int a
, int b
)
172 return ((b
% a
) == 0);
177 /* Dump into FILE all the data references from DATAREFS. */
180 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
183 struct data_reference
*dr
;
185 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
186 dump_data_reference (file
, dr
);
189 /* Unified dump into FILE all the data references from DATAREFS. */
192 debug (vec
<data_reference_p
> &ref
)
194 dump_data_references (stderr
, ref
);
198 debug (vec
<data_reference_p
> *ptr
)
203 fprintf (stderr
, "<nil>\n");
207 /* Dump into STDERR all the data references from DATAREFS. */
210 debug_data_references (vec
<data_reference_p
> datarefs
)
212 dump_data_references (stderr
, datarefs
);
215 /* Print to STDERR the data_reference DR. */
218 debug_data_reference (struct data_reference
*dr
)
220 dump_data_reference (stderr
, dr
);
223 /* Dump function for a DATA_REFERENCE structure. */
226 dump_data_reference (FILE *outf
,
227 struct data_reference
*dr
)
231 fprintf (outf
, "#(Data Ref: \n");
232 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
233 fprintf (outf
, "# stmt: ");
234 print_gimple_stmt (outf
, DR_STMT (dr
), 0, 0);
235 fprintf (outf
, "# ref: ");
236 print_generic_stmt (outf
, DR_REF (dr
), 0);
237 fprintf (outf
, "# base_object: ");
238 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
), 0);
240 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
242 fprintf (outf
, "# Access function %d: ", i
);
243 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
), 0);
245 fprintf (outf
, "#)\n");
248 /* Unified dump function for a DATA_REFERENCE structure. */
251 debug (data_reference
&ref
)
253 dump_data_reference (stderr
, &ref
);
257 debug (data_reference
*ptr
)
262 fprintf (stderr
, "<nil>\n");
266 /* Dumps the affine function described by FN to the file OUTF. */
269 dump_affine_function (FILE *outf
, affine_fn fn
)
274 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
275 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
277 fprintf (outf
, " + ");
278 print_generic_expr (outf
, coef
, TDF_SLIM
);
279 fprintf (outf
, " * x_%u", i
);
283 /* Dumps the conflict function CF to the file OUTF. */
286 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
290 if (cf
->n
== NO_DEPENDENCE
)
291 fprintf (outf
, "no dependence");
292 else if (cf
->n
== NOT_KNOWN
)
293 fprintf (outf
, "not known");
296 for (i
= 0; i
< cf
->n
; i
++)
301 dump_affine_function (outf
, cf
->fns
[i
]);
307 /* Dump function for a SUBSCRIPT structure. */
310 dump_subscript (FILE *outf
, struct subscript
*subscript
)
312 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
314 fprintf (outf
, "\n (subscript \n");
315 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
316 dump_conflict_function (outf
, cf
);
317 if (CF_NONTRIVIAL_P (cf
))
319 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
320 fprintf (outf
, "\n last_conflict: ");
321 print_generic_expr (outf
, last_iteration
, 0);
324 cf
= SUB_CONFLICTS_IN_B (subscript
);
325 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
326 dump_conflict_function (outf
, cf
);
327 if (CF_NONTRIVIAL_P (cf
))
329 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
330 fprintf (outf
, "\n last_conflict: ");
331 print_generic_expr (outf
, last_iteration
, 0);
334 fprintf (outf
, "\n (Subscript distance: ");
335 print_generic_expr (outf
, SUB_DISTANCE (subscript
), 0);
336 fprintf (outf
, " ))\n");
339 /* Print the classic direction vector DIRV to OUTF. */
342 print_direction_vector (FILE *outf
,
348 for (eq
= 0; eq
< length
; eq
++)
350 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
356 fprintf (outf
, " +");
359 fprintf (outf
, " -");
362 fprintf (outf
, " =");
364 case dir_positive_or_equal
:
365 fprintf (outf
, " +=");
367 case dir_positive_or_negative
:
368 fprintf (outf
, " +-");
370 case dir_negative_or_equal
:
371 fprintf (outf
, " -=");
374 fprintf (outf
, " *");
377 fprintf (outf
, "indep");
381 fprintf (outf
, "\n");
384 /* Print a vector of direction vectors. */
387 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
393 FOR_EACH_VEC_ELT (dir_vects
, j
, v
)
394 print_direction_vector (outf
, v
, length
);
397 /* Print out a vector VEC of length N to OUTFILE. */
400 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
404 for (i
= 0; i
< n
; i
++)
405 fprintf (outfile
, "%3d ", vector
[i
]);
406 fprintf (outfile
, "\n");
409 /* Print a vector of distance vectors. */
412 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
418 FOR_EACH_VEC_ELT (dist_vects
, j
, v
)
419 print_lambda_vector (outf
, v
, length
);
422 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
425 dump_data_dependence_relation (FILE *outf
,
426 struct data_dependence_relation
*ddr
)
428 struct data_reference
*dra
, *drb
;
430 fprintf (outf
, "(Data Dep: \n");
432 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
439 dump_data_reference (outf
, dra
);
441 fprintf (outf
, " (nil)\n");
443 dump_data_reference (outf
, drb
);
445 fprintf (outf
, " (nil)\n");
447 fprintf (outf
, " (don't know)\n)\n");
453 dump_data_reference (outf
, dra
);
454 dump_data_reference (outf
, drb
);
456 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
457 fprintf (outf
, " (no dependence)\n");
459 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
464 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
466 fprintf (outf
, " access_fn_A: ");
467 print_generic_stmt (outf
, DR_ACCESS_FN (dra
, i
), 0);
468 fprintf (outf
, " access_fn_B: ");
469 print_generic_stmt (outf
, DR_ACCESS_FN (drb
, i
), 0);
470 dump_subscript (outf
, DDR_SUBSCRIPT (ddr
, i
));
473 fprintf (outf
, " inner loop index: %d\n", DDR_INNER_LOOP (ddr
));
474 fprintf (outf
, " loop nest: (");
475 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
476 fprintf (outf
, "%d ", loopi
->num
);
477 fprintf (outf
, ")\n");
479 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
481 fprintf (outf
, " distance_vector: ");
482 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
486 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
488 fprintf (outf
, " direction_vector: ");
489 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
494 fprintf (outf
, ")\n");
500 debug_data_dependence_relation (struct data_dependence_relation
*ddr
)
502 dump_data_dependence_relation (stderr
, ddr
);
505 /* Dump into FILE all the dependence relations from DDRS. */
508 dump_data_dependence_relations (FILE *file
,
512 struct data_dependence_relation
*ddr
;
514 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
515 dump_data_dependence_relation (file
, ddr
);
519 debug (vec
<ddr_p
> &ref
)
521 dump_data_dependence_relations (stderr
, ref
);
525 debug (vec
<ddr_p
> *ptr
)
530 fprintf (stderr
, "<nil>\n");
534 /* Dump to STDERR all the dependence relations from DDRS. */
537 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
539 dump_data_dependence_relations (stderr
, ddrs
);
542 /* Dumps the distance and direction vectors in FILE. DDRS contains
543 the dependence relations, and VECT_SIZE is the size of the
544 dependence vectors, or in other words the number of loops in the
548 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
551 struct data_dependence_relation
*ddr
;
554 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
555 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
557 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), j
, v
)
559 fprintf (file
, "DISTANCE_V (");
560 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
561 fprintf (file
, ")\n");
564 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), j
, v
)
566 fprintf (file
, "DIRECTION_V (");
567 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
568 fprintf (file
, ")\n");
572 fprintf (file
, "\n\n");
575 /* Dumps the data dependence relations DDRS in FILE. */
578 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
581 struct data_dependence_relation
*ddr
;
583 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
584 dump_data_dependence_relation (file
, ddr
);
586 fprintf (file
, "\n\n");
590 debug_ddrs (vec
<ddr_p
> ddrs
)
592 dump_ddrs (stderr
, ddrs
);
595 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
596 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
597 constant of type ssizetype, and returns true. If we cannot do this
598 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
602 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
603 tree
*var
, tree
*off
)
607 enum tree_code ocode
= code
;
615 *var
= build_int_cst (type
, 0);
616 *off
= fold_convert (ssizetype
, op0
);
619 case POINTER_PLUS_EXPR
:
624 split_constant_offset (op0
, &var0
, &off0
);
625 split_constant_offset (op1
, &var1
, &off1
);
626 *var
= fold_build2 (code
, type
, var0
, var1
);
627 *off
= size_binop (ocode
, off0
, off1
);
631 if (TREE_CODE (op1
) != INTEGER_CST
)
634 split_constant_offset (op0
, &var0
, &off0
);
635 *var
= fold_build2 (MULT_EXPR
, type
, var0
, op1
);
636 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
642 HOST_WIDE_INT pbitsize
, pbitpos
;
644 int punsignedp
, pvolatilep
;
646 op0
= TREE_OPERAND (op0
, 0);
647 base
= get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
,
648 &pmode
, &punsignedp
, &pvolatilep
, false);
650 if (pbitpos
% BITS_PER_UNIT
!= 0)
652 base
= build_fold_addr_expr (base
);
653 off0
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
657 split_constant_offset (poffset
, &poffset
, &off1
);
658 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
659 if (POINTER_TYPE_P (TREE_TYPE (base
)))
660 base
= fold_build_pointer_plus (base
, poffset
);
662 base
= fold_build2 (PLUS_EXPR
, TREE_TYPE (base
), base
,
663 fold_convert (TREE_TYPE (base
), poffset
));
666 var0
= fold_convert (type
, base
);
668 /* If variable length types are involved, punt, otherwise casts
669 might be converted into ARRAY_REFs in gimplify_conversion.
670 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
671 possibly no longer appears in current GIMPLE, might resurface.
672 This perhaps could run
673 if (CONVERT_EXPR_P (var0))
675 gimplify_conversion (&var0);
676 // Attempt to fill in any within var0 found ARRAY_REF's
677 // element size from corresponding op embedded ARRAY_REF,
678 // if unsuccessful, just punt.
680 while (POINTER_TYPE_P (type
))
681 type
= TREE_TYPE (type
);
682 if (int_size_in_bytes (type
) < 0)
692 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
695 gimple def_stmt
= SSA_NAME_DEF_STMT (op0
);
696 enum tree_code subcode
;
698 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
701 var0
= gimple_assign_rhs1 (def_stmt
);
702 subcode
= gimple_assign_rhs_code (def_stmt
);
703 var1
= gimple_assign_rhs2 (def_stmt
);
705 return split_constant_offset_1 (type
, var0
, subcode
, var1
, var
, off
);
709 /* We must not introduce undefined overflow, and we must not change the value.
710 Hence we're okay if the inner type doesn't overflow to start with
711 (pointer or signed), the outer type also is an integer or pointer
712 and the outer precision is at least as large as the inner. */
713 tree itype
= TREE_TYPE (op0
);
714 if ((POINTER_TYPE_P (itype
)
715 || (INTEGRAL_TYPE_P (itype
) && TYPE_OVERFLOW_UNDEFINED (itype
)))
716 && TYPE_PRECISION (type
) >= TYPE_PRECISION (itype
)
717 && (POINTER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)))
719 split_constant_offset (op0
, &var0
, off
);
720 *var
= fold_convert (type
, var0
);
731 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
732 will be ssizetype. */
735 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
737 tree type
= TREE_TYPE (exp
), otype
, op0
, op1
, e
, o
;
741 *off
= ssize_int (0);
744 if (tree_is_chrec (exp
)
745 || get_gimple_rhs_class (TREE_CODE (exp
)) == GIMPLE_TERNARY_RHS
)
748 otype
= TREE_TYPE (exp
);
749 code
= TREE_CODE (exp
);
750 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
751 if (split_constant_offset_1 (otype
, op0
, code
, op1
, &e
, &o
))
753 *var
= fold_convert (type
, e
);
758 /* Returns the address ADDR of an object in a canonical shape (without nop
759 casts, and with type of pointer to the object). */
762 canonicalize_base_object_address (tree addr
)
768 /* The base address may be obtained by casting from integer, in that case
770 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
773 if (TREE_CODE (addr
) != ADDR_EXPR
)
776 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
779 /* Analyzes the behavior of the memory reference DR in the innermost loop or
780 basic block that contains it. Returns true if analysis succeed or false
784 dr_analyze_innermost (struct data_reference
*dr
, struct loop
*nest
)
786 gimple stmt
= DR_STMT (dr
);
787 struct loop
*loop
= loop_containing_stmt (stmt
);
788 tree ref
= DR_REF (dr
);
789 HOST_WIDE_INT pbitsize
, pbitpos
;
792 int punsignedp
, pvolatilep
;
793 affine_iv base_iv
, offset_iv
;
794 tree init
, dinit
, step
;
795 bool in_loop
= (loop
&& loop
->num
);
797 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
798 fprintf (dump_file
, "analyze_innermost: ");
800 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
,
801 &pmode
, &punsignedp
, &pvolatilep
, false);
802 gcc_assert (base
!= NULL_TREE
);
804 if (pbitpos
% BITS_PER_UNIT
!= 0)
806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
807 fprintf (dump_file
, "failed: bit offset alignment.\n");
811 if (TREE_CODE (base
) == MEM_REF
)
813 if (!integer_zerop (TREE_OPERAND (base
, 1)))
815 offset_int moff
= mem_ref_offset (base
);
816 tree mofft
= wide_int_to_tree (sizetype
, moff
);
820 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
822 base
= TREE_OPERAND (base
, 0);
825 base
= build_fold_addr_expr (base
);
829 if (!simple_iv (loop
, loop_containing_stmt (stmt
), base
, &base_iv
,
830 nest
? true : false))
834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
835 fprintf (dump_file
, "failed: evolution of base is not"
842 base_iv
.step
= ssize_int (0);
843 base_iv
.no_overflow
= true;
850 base_iv
.step
= ssize_int (0);
851 base_iv
.no_overflow
= true;
856 offset_iv
.base
= ssize_int (0);
857 offset_iv
.step
= ssize_int (0);
863 offset_iv
.base
= poffset
;
864 offset_iv
.step
= ssize_int (0);
866 else if (!simple_iv (loop
, loop_containing_stmt (stmt
),
868 nest
? true : false))
872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
873 fprintf (dump_file
, "failed: evolution of offset is not"
879 offset_iv
.base
= poffset
;
880 offset_iv
.step
= ssize_int (0);
885 init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
886 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
887 init
= size_binop (PLUS_EXPR
, init
, dinit
);
888 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
889 init
= size_binop (PLUS_EXPR
, init
, dinit
);
891 step
= size_binop (PLUS_EXPR
,
892 fold_convert (ssizetype
, base_iv
.step
),
893 fold_convert (ssizetype
, offset_iv
.step
));
895 DR_BASE_ADDRESS (dr
) = canonicalize_base_object_address (base_iv
.base
);
897 DR_OFFSET (dr
) = fold_convert (ssizetype
, offset_iv
.base
);
901 DR_ALIGNED_TO (dr
) = size_int (highest_pow2_factor (offset_iv
.base
));
903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
904 fprintf (dump_file
, "success.\n");
909 /* Determines the base object and the list of indices of memory reference
910 DR, analyzed in LOOP and instantiated in loop nest NEST. */
913 dr_analyze_indices (struct data_reference
*dr
, loop_p nest
, loop_p loop
)
915 vec
<tree
> access_fns
= vNULL
;
917 tree base
, off
, access_fn
;
918 basic_block before_loop
;
920 /* If analyzing a basic-block there are no indices to analyze
921 and thus no access functions. */
924 DR_BASE_OBJECT (dr
) = DR_REF (dr
);
925 DR_ACCESS_FNS (dr
).create (0);
930 before_loop
= block_before_loop (nest
);
932 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
933 into a two element array with a constant index. The base is
934 then just the immediate underlying object. */
935 if (TREE_CODE (ref
) == REALPART_EXPR
)
937 ref
= TREE_OPERAND (ref
, 0);
938 access_fns
.safe_push (integer_zero_node
);
940 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
942 ref
= TREE_OPERAND (ref
, 0);
943 access_fns
.safe_push (integer_one_node
);
946 /* Analyze access functions of dimensions we know to be independent. */
947 while (handled_component_p (ref
))
949 if (TREE_CODE (ref
) == ARRAY_REF
)
951 op
= TREE_OPERAND (ref
, 1);
952 access_fn
= analyze_scalar_evolution (loop
, op
);
953 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
954 access_fns
.safe_push (access_fn
);
956 else if (TREE_CODE (ref
) == COMPONENT_REF
957 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
959 /* For COMPONENT_REFs of records (but not unions!) use the
960 FIELD_DECL offset as constant access function so we can
961 disambiguate a[i].f1 and a[i].f2. */
962 tree off
= component_ref_field_offset (ref
);
963 off
= size_binop (PLUS_EXPR
,
964 size_binop (MULT_EXPR
,
965 fold_convert (bitsizetype
, off
),
966 bitsize_int (BITS_PER_UNIT
)),
967 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
968 access_fns
.safe_push (off
);
971 /* If we have an unhandled component we could not translate
972 to an access function stop analyzing. We have determined
973 our base object in this case. */
976 ref
= TREE_OPERAND (ref
, 0);
979 /* If the address operand of a MEM_REF base has an evolution in the
980 analyzed nest, add it as an additional independent access-function. */
981 if (TREE_CODE (ref
) == MEM_REF
)
983 op
= TREE_OPERAND (ref
, 0);
984 access_fn
= analyze_scalar_evolution (loop
, op
);
985 access_fn
= instantiate_scev (before_loop
, loop
, access_fn
);
986 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
989 tree memoff
= TREE_OPERAND (ref
, 1);
990 base
= initial_condition (access_fn
);
991 orig_type
= TREE_TYPE (base
);
992 STRIP_USELESS_TYPE_CONVERSION (base
);
993 split_constant_offset (base
, &base
, &off
);
994 STRIP_USELESS_TYPE_CONVERSION (base
);
995 /* Fold the MEM_REF offset into the evolutions initial
996 value to make more bases comparable. */
997 if (!integer_zerop (memoff
))
999 off
= size_binop (PLUS_EXPR
, off
,
1000 fold_convert (ssizetype
, memoff
));
1001 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1003 /* Adjust the offset so it is a multiple of the access type
1004 size and thus we separate bases that can possibly be used
1005 to produce partial overlaps (which the access_fn machinery
1008 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1009 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1010 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1011 rem
= wi::mod_trunc (off
, TYPE_SIZE_UNIT (TREE_TYPE (ref
)), SIGNED
);
1013 /* If we can't compute the remainder simply force the initial
1014 condition to zero. */
1016 off
= wide_int_to_tree (ssizetype
, wi::sub (off
, rem
));
1017 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1018 /* And finally replace the initial condition. */
1019 access_fn
= chrec_replace_initial_condition
1020 (access_fn
, fold_convert (orig_type
, off
));
1021 /* ??? This is still not a suitable base object for
1022 dr_may_alias_p - the base object needs to be an
1023 access that covers the object as whole. With
1024 an evolution in the pointer this cannot be
1026 As a band-aid, mark the access so we can special-case
1027 it in dr_may_alias_p. */
1029 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1030 MEM_REF
, TREE_TYPE (ref
),
1032 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1033 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1034 DR_UNCONSTRAINED_BASE (dr
) = true;
1035 access_fns
.safe_push (access_fn
);
1038 else if (DECL_P (ref
))
1040 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1041 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1042 build_fold_addr_expr (ref
),
1043 build_int_cst (reference_alias_ptr_type (ref
), 0));
1046 DR_BASE_OBJECT (dr
) = ref
;
1047 DR_ACCESS_FNS (dr
) = access_fns
;
1050 /* Extracts the alias analysis information from the memory reference DR. */
1053 dr_analyze_alias (struct data_reference
*dr
)
1055 tree ref
= DR_REF (dr
);
1056 tree base
= get_base_address (ref
), addr
;
1058 if (INDIRECT_REF_P (base
)
1059 || TREE_CODE (base
) == MEM_REF
)
1061 addr
= TREE_OPERAND (base
, 0);
1062 if (TREE_CODE (addr
) == SSA_NAME
)
1063 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1067 /* Frees data reference DR. */
1070 free_data_ref (data_reference_p dr
)
1072 DR_ACCESS_FNS (dr
).release ();
1076 /* Analyzes memory reference MEMREF accessed in STMT. The reference
1077 is read if IS_READ is true, write otherwise. Returns the
1078 data_reference description of MEMREF. NEST is the outermost loop
1079 in which the reference should be instantiated, LOOP is the loop in
1080 which the data reference should be analyzed. */
1082 struct data_reference
*
1083 create_data_ref (loop_p nest
, loop_p loop
, tree memref
, gimple stmt
,
1086 struct data_reference
*dr
;
1088 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1090 fprintf (dump_file
, "Creating dr for ");
1091 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1092 fprintf (dump_file
, "\n");
1095 dr
= XCNEW (struct data_reference
);
1096 DR_STMT (dr
) = stmt
;
1097 DR_REF (dr
) = memref
;
1098 DR_IS_READ (dr
) = is_read
;
1100 dr_analyze_innermost (dr
, nest
);
1101 dr_analyze_indices (dr
, nest
, loop
);
1102 dr_analyze_alias (dr
);
1104 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1107 fprintf (dump_file
, "\tbase_address: ");
1108 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1109 fprintf (dump_file
, "\n\toffset from base address: ");
1110 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1111 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1112 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1113 fprintf (dump_file
, "\n\tstep: ");
1114 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1115 fprintf (dump_file
, "\n\taligned to: ");
1116 print_generic_expr (dump_file
, DR_ALIGNED_TO (dr
), TDF_SLIM
);
1117 fprintf (dump_file
, "\n\tbase_object: ");
1118 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1119 fprintf (dump_file
, "\n");
1120 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1122 fprintf (dump_file
, "\tAccess function %d: ", i
);
1123 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1130 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1133 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
1137 STRIP_NOPS (offset1
);
1138 STRIP_NOPS (offset2
);
1140 if (offset1
== offset2
)
1143 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
1144 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
1147 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
1148 TREE_OPERAND (offset2
, 0));
1150 if (!res
|| !BINARY_CLASS_P (offset1
))
1153 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
1154 TREE_OPERAND (offset2
, 1));
1159 /* Check if DRA and DRB have equal offsets. */
1161 dr_equal_offsets_p (struct data_reference
*dra
,
1162 struct data_reference
*drb
)
1164 tree offset1
, offset2
;
1166 offset1
= DR_OFFSET (dra
);
1167 offset2
= DR_OFFSET (drb
);
1169 return dr_equal_offsets_p1 (offset1
, offset2
);
1172 /* Returns true if FNA == FNB. */
1175 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
1177 unsigned i
, n
= fna
.length ();
1179 if (n
!= fnb
.length ())
1182 for (i
= 0; i
< n
; i
++)
1183 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
1189 /* If all the functions in CF are the same, returns one of them,
1190 otherwise returns NULL. */
1193 common_affine_function (conflict_function
*cf
)
1198 if (!CF_NONTRIVIAL_P (cf
))
1199 return affine_fn ();
1203 for (i
= 1; i
< cf
->n
; i
++)
1204 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
1205 return affine_fn ();
1210 /* Returns the base of the affine function FN. */
1213 affine_function_base (affine_fn fn
)
1218 /* Returns true if FN is a constant. */
1221 affine_function_constant_p (affine_fn fn
)
1226 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
1227 if (!integer_zerop (coef
))
1233 /* Returns true if FN is the zero constant function. */
1236 affine_function_zero_p (affine_fn fn
)
1238 return (integer_zerop (affine_function_base (fn
))
1239 && affine_function_constant_p (fn
));
1242 /* Returns a signed integer type with the largest precision from TA
1246 signed_type_for_types (tree ta
, tree tb
)
1248 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
1249 return signed_type_for (ta
);
1251 return signed_type_for (tb
);
1254 /* Applies operation OP on affine functions FNA and FNB, and returns the
1258 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
1264 if (fnb
.length () > fna
.length ())
1276 for (i
= 0; i
< n
; i
++)
1278 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
1279 TREE_TYPE (fnb
[i
]));
1280 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
1283 for (; fna
.iterate (i
, &coef
); i
++)
1284 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1285 coef
, integer_zero_node
));
1286 for (; fnb
.iterate (i
, &coef
); i
++)
1287 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
1288 integer_zero_node
, coef
));
1293 /* Returns the sum of affine functions FNA and FNB. */
1296 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
1298 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
1301 /* Returns the difference of affine functions FNA and FNB. */
1304 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
1306 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
1309 /* Frees affine function FN. */
1312 affine_fn_free (affine_fn fn
)
1317 /* Determine for each subscript in the data dependence relation DDR
1321 compute_subscript_distance (struct data_dependence_relation
*ddr
)
1323 conflict_function
*cf_a
, *cf_b
;
1324 affine_fn fn_a
, fn_b
, diff
;
1326 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1330 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
1332 struct subscript
*subscript
;
1334 subscript
= DDR_SUBSCRIPT (ddr
, i
);
1335 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
1336 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
1338 fn_a
= common_affine_function (cf_a
);
1339 fn_b
= common_affine_function (cf_b
);
1340 if (!fn_a
.exists () || !fn_b
.exists ())
1342 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1345 diff
= affine_fn_minus (fn_a
, fn_b
);
1347 if (affine_function_constant_p (diff
))
1348 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
1350 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1352 affine_fn_free (diff
);
1357 /* Returns the conflict function for "unknown". */
1359 static conflict_function
*
1360 conflict_fn_not_known (void)
1362 conflict_function
*fn
= XCNEW (conflict_function
);
1368 /* Returns the conflict function for "independent". */
1370 static conflict_function
*
1371 conflict_fn_no_dependence (void)
1373 conflict_function
*fn
= XCNEW (conflict_function
);
1374 fn
->n
= NO_DEPENDENCE
;
1379 /* Returns true if the address of OBJ is invariant in LOOP. */
1382 object_address_invariant_in_loop_p (const struct loop
*loop
, const_tree obj
)
1384 while (handled_component_p (obj
))
1386 if (TREE_CODE (obj
) == ARRAY_REF
)
1388 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1389 need to check the stride and the lower bound of the reference. */
1390 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1392 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 3),
1396 else if (TREE_CODE (obj
) == COMPONENT_REF
)
1398 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
1402 obj
= TREE_OPERAND (obj
, 0);
1405 if (!INDIRECT_REF_P (obj
)
1406 && TREE_CODE (obj
) != MEM_REF
)
1409 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
1413 /* Returns false if we can prove that data references A and B do not alias,
1414 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1418 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
1421 tree addr_a
= DR_BASE_OBJECT (a
);
1422 tree addr_b
= DR_BASE_OBJECT (b
);
1424 /* If we are not processing a loop nest but scalar code we
1425 do not need to care about possible cross-iteration dependences
1426 and thus can process the full original reference. Do so,
1427 similar to how loop invariant motion applies extra offset-based
1431 aff_tree off1
, off2
;
1432 widest_int size1
, size2
;
1433 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
1434 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
1435 aff_combination_scale (&off1
, -1);
1436 aff_combination_add (&off2
, &off1
);
1437 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1441 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
1442 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
1443 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
1444 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
1447 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1448 do not know the size of the base-object. So we cannot do any
1449 offset/overlap based analysis but have to rely on points-to
1450 information only. */
1451 if (TREE_CODE (addr_a
) == MEM_REF
1452 && (DR_UNCONSTRAINED_BASE (a
)
1453 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
1455 /* For true dependences we can apply TBAA. */
1456 if (flag_strict_aliasing
1457 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1458 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1459 get_alias_set (DR_REF (b
))))
1461 if (TREE_CODE (addr_b
) == MEM_REF
)
1462 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1463 TREE_OPERAND (addr_b
, 0));
1465 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1466 build_fold_addr_expr (addr_b
));
1468 else if (TREE_CODE (addr_b
) == MEM_REF
1469 && (DR_UNCONSTRAINED_BASE (b
)
1470 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
1472 /* For true dependences we can apply TBAA. */
1473 if (flag_strict_aliasing
1474 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
1475 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
1476 get_alias_set (DR_REF (b
))))
1478 if (TREE_CODE (addr_a
) == MEM_REF
)
1479 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
1480 TREE_OPERAND (addr_b
, 0));
1482 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
1483 TREE_OPERAND (addr_b
, 0));
1486 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1487 that is being subsetted in the loop nest. */
1488 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
1489 return refs_output_dependent_p (addr_a
, addr_b
);
1490 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
1491 return refs_anti_dependent_p (addr_a
, addr_b
);
1492 return refs_may_alias_p (addr_a
, addr_b
);
1495 /* Initialize a data dependence relation between data accesses A and
1496 B. NB_LOOPS is the number of loops surrounding the references: the
1497 size of the classic distance/direction vectors. */
1499 struct data_dependence_relation
*
1500 initialize_data_dependence_relation (struct data_reference
*a
,
1501 struct data_reference
*b
,
1502 vec
<loop_p
> loop_nest
)
1504 struct data_dependence_relation
*res
;
1507 res
= XNEW (struct data_dependence_relation
);
1510 DDR_LOOP_NEST (res
).create (0);
1511 DDR_REVERSED_P (res
) = false;
1512 DDR_SUBSCRIPTS (res
).create (0);
1513 DDR_DIR_VECTS (res
).create (0);
1514 DDR_DIST_VECTS (res
).create (0);
1516 if (a
== NULL
|| b
== NULL
)
1518 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1522 /* If the data references do not alias, then they are independent. */
1523 if (!dr_may_alias_p (a
, b
, loop_nest
.exists ()))
1525 DDR_ARE_DEPENDENT (res
) = chrec_known
;
1529 /* The case where the references are exactly the same. */
1530 if (operand_equal_p (DR_REF (a
), DR_REF (b
), 0))
1532 if (loop_nest
.exists ()
1533 && !object_address_invariant_in_loop_p (loop_nest
[0],
1534 DR_BASE_OBJECT (a
)))
1536 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1539 DDR_AFFINE_P (res
) = true;
1540 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1541 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1542 DDR_LOOP_NEST (res
) = loop_nest
;
1543 DDR_INNER_LOOP (res
) = 0;
1544 DDR_SELF_REFERENCE (res
) = true;
1545 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1547 struct subscript
*subscript
;
1549 subscript
= XNEW (struct subscript
);
1550 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1551 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1552 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1553 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1554 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1559 /* If the references do not access the same object, we do not know
1560 whether they alias or not. */
1561 if (!operand_equal_p (DR_BASE_OBJECT (a
), DR_BASE_OBJECT (b
), 0))
1563 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1567 /* If the base of the object is not invariant in the loop nest, we cannot
1568 analyze it. TODO -- in fact, it would suffice to record that there may
1569 be arbitrary dependences in the loops where the base object varies. */
1570 if (loop_nest
.exists ()
1571 && !object_address_invariant_in_loop_p (loop_nest
[0],
1572 DR_BASE_OBJECT (a
)))
1574 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1578 /* If the number of dimensions of the access to not agree we can have
1579 a pointer access to a component of the array element type and an
1580 array access while the base-objects are still the same. Punt. */
1581 if (DR_NUM_DIMENSIONS (a
) != DR_NUM_DIMENSIONS (b
))
1583 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
1587 DDR_AFFINE_P (res
) = true;
1588 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
1589 DDR_SUBSCRIPTS (res
).create (DR_NUM_DIMENSIONS (a
));
1590 DDR_LOOP_NEST (res
) = loop_nest
;
1591 DDR_INNER_LOOP (res
) = 0;
1592 DDR_SELF_REFERENCE (res
) = false;
1594 for (i
= 0; i
< DR_NUM_DIMENSIONS (a
); i
++)
1596 struct subscript
*subscript
;
1598 subscript
= XNEW (struct subscript
);
1599 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
1600 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
1601 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
1602 SUB_DISTANCE (subscript
) = chrec_dont_know
;
1603 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
1609 /* Frees memory used by the conflict function F. */
1612 free_conflict_function (conflict_function
*f
)
1616 if (CF_NONTRIVIAL_P (f
))
1618 for (i
= 0; i
< f
->n
; i
++)
1619 affine_fn_free (f
->fns
[i
]);
1624 /* Frees memory used by SUBSCRIPTS. */
1627 free_subscripts (vec
<subscript_p
> subscripts
)
1632 FOR_EACH_VEC_ELT (subscripts
, i
, s
)
1634 free_conflict_function (s
->conflicting_iterations_in_a
);
1635 free_conflict_function (s
->conflicting_iterations_in_b
);
1638 subscripts
.release ();
1641 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1645 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
1648 DDR_ARE_DEPENDENT (ddr
) = chrec
;
1649 free_subscripts (DDR_SUBSCRIPTS (ddr
));
1650 DDR_SUBSCRIPTS (ddr
).create (0);
1653 /* The dependence relation DDR cannot be represented by a distance
1657 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
1659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1660 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
1662 DDR_AFFINE_P (ddr
) = false;
1667 /* This section contains the classic Banerjee tests. */
1669 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1670 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1673 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1675 return (evolution_function_is_constant_p (chrec_a
)
1676 && evolution_function_is_constant_p (chrec_b
));
1679 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1680 variable, i.e., if the SIV (Single Index Variable) test is true. */
1683 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
1685 if ((evolution_function_is_constant_p (chrec_a
)
1686 && evolution_function_is_univariate_p (chrec_b
))
1687 || (evolution_function_is_constant_p (chrec_b
)
1688 && evolution_function_is_univariate_p (chrec_a
)))
1691 if (evolution_function_is_univariate_p (chrec_a
)
1692 && evolution_function_is_univariate_p (chrec_b
))
1694 switch (TREE_CODE (chrec_a
))
1696 case POLYNOMIAL_CHREC
:
1697 switch (TREE_CODE (chrec_b
))
1699 case POLYNOMIAL_CHREC
:
1700 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
1715 /* Creates a conflict function with N dimensions. The affine functions
1716 in each dimension follow. */
1718 static conflict_function
*
1719 conflict_fn (unsigned n
, ...)
1722 conflict_function
*ret
= XCNEW (conflict_function
);
1725 gcc_assert (0 < n
&& n
<= MAX_DIM
);
1729 for (i
= 0; i
< n
; i
++)
1730 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
1736 /* Returns constant affine function with value CST. */
1739 affine_fn_cst (tree cst
)
1743 fn
.quick_push (cst
);
1747 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1750 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
1753 fn
.create (dim
+ 1);
1756 gcc_assert (dim
> 0);
1757 fn
.quick_push (cst
);
1758 for (i
= 1; i
< dim
; i
++)
1759 fn
.quick_push (integer_zero_node
);
1760 fn
.quick_push (coef
);
1764 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1765 *OVERLAPS_B are initialized to the functions that describe the
1766 relation between the elements accessed twice by CHREC_A and
1767 CHREC_B. For k >= 0, the following property is verified:
1769 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1772 analyze_ziv_subscript (tree chrec_a
,
1774 conflict_function
**overlaps_a
,
1775 conflict_function
**overlaps_b
,
1776 tree
*last_conflicts
)
1778 tree type
, difference
;
1779 dependence_stats
.num_ziv
++;
1781 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1782 fprintf (dump_file
, "(analyze_ziv_subscript \n");
1784 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1785 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1786 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1787 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
1789 switch (TREE_CODE (difference
))
1792 if (integer_zerop (difference
))
1794 /* The difference is equal to zero: the accessed index
1795 overlaps for each iteration in the loop. */
1796 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1797 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1798 *last_conflicts
= chrec_dont_know
;
1799 dependence_stats
.num_ziv_dependent
++;
1803 /* The accesses do not overlap. */
1804 *overlaps_a
= conflict_fn_no_dependence ();
1805 *overlaps_b
= conflict_fn_no_dependence ();
1806 *last_conflicts
= integer_zero_node
;
1807 dependence_stats
.num_ziv_independent
++;
1812 /* We're not sure whether the indexes overlap. For the moment,
1813 conservatively answer "don't know". */
1814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1815 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
1817 *overlaps_a
= conflict_fn_not_known ();
1818 *overlaps_b
= conflict_fn_not_known ();
1819 *last_conflicts
= chrec_dont_know
;
1820 dependence_stats
.num_ziv_unimplemented
++;
1824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1825 fprintf (dump_file
, ")\n");
1828 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1829 and only if it fits to the int type. If this is not the case, or the
1830 bound on the number of iterations of LOOP could not be derived, returns
1834 max_stmt_executions_tree (struct loop
*loop
)
1838 if (!max_stmt_executions (loop
, &nit
))
1839 return chrec_dont_know
;
1841 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
1842 return chrec_dont_know
;
1844 return wide_int_to_tree (unsigned_type_node
, nit
);
1847 /* Determine whether the CHREC is always positive/negative. If the expression
1848 cannot be statically analyzed, return false, otherwise set the answer into
1852 chrec_is_positive (tree chrec
, bool *value
)
1854 bool value0
, value1
, value2
;
1855 tree end_value
, nb_iter
;
1857 switch (TREE_CODE (chrec
))
1859 case POLYNOMIAL_CHREC
:
1860 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
1861 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
1864 /* FIXME -- overflows. */
1865 if (value0
== value1
)
1871 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1872 and the proof consists in showing that the sign never
1873 changes during the execution of the loop, from 0 to
1874 loop->nb_iterations. */
1875 if (!evolution_function_is_affine_p (chrec
))
1878 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
1879 if (chrec_contains_undetermined (nb_iter
))
1883 /* TODO -- If the test is after the exit, we may decrease the number of
1884 iterations by one. */
1886 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
1889 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
1891 if (!chrec_is_positive (end_value
, &value2
))
1895 return value0
== value1
;
1898 switch (tree_int_cst_sgn (chrec
))
1917 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1918 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1919 *OVERLAPS_B are initialized to the functions that describe the
1920 relation between the elements accessed twice by CHREC_A and
1921 CHREC_B. For k >= 0, the following property is verified:
1923 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1926 analyze_siv_subscript_cst_affine (tree chrec_a
,
1928 conflict_function
**overlaps_a
,
1929 conflict_function
**overlaps_b
,
1930 tree
*last_conflicts
)
1932 bool value0
, value1
, value2
;
1933 tree type
, difference
, tmp
;
1935 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
1936 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
1937 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
1938 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
1940 /* Special case overlap in the first iteration. */
1941 if (integer_zerop (difference
))
1943 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1944 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1945 *last_conflicts
= integer_one_node
;
1949 if (!chrec_is_positive (initial_condition (difference
), &value0
))
1951 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1952 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
1954 dependence_stats
.num_siv_unimplemented
++;
1955 *overlaps_a
= conflict_fn_not_known ();
1956 *overlaps_b
= conflict_fn_not_known ();
1957 *last_conflicts
= chrec_dont_know
;
1962 if (value0
== false)
1964 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
1966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1967 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
1969 *overlaps_a
= conflict_fn_not_known ();
1970 *overlaps_b
= conflict_fn_not_known ();
1971 *last_conflicts
= chrec_dont_know
;
1972 dependence_stats
.num_siv_unimplemented
++;
1981 chrec_b = {10, +, 1}
1984 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
1986 HOST_WIDE_INT numiter
;
1987 struct loop
*loop
= get_chrec_loop (chrec_b
);
1989 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
1990 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
1991 fold_build1 (ABS_EXPR
, type
, difference
),
1992 CHREC_RIGHT (chrec_b
));
1993 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
1994 *last_conflicts
= integer_one_node
;
1997 /* Perform weak-zero siv test to see if overlap is
1998 outside the loop bounds. */
1999 numiter
= max_stmt_executions_int (loop
);
2002 && compare_tree_int (tmp
, numiter
) > 0)
2004 free_conflict_function (*overlaps_a
);
2005 free_conflict_function (*overlaps_b
);
2006 *overlaps_a
= conflict_fn_no_dependence ();
2007 *overlaps_b
= conflict_fn_no_dependence ();
2008 *last_conflicts
= integer_zero_node
;
2009 dependence_stats
.num_siv_independent
++;
2012 dependence_stats
.num_siv_dependent
++;
2016 /* When the step does not divide the difference, there are
2020 *overlaps_a
= conflict_fn_no_dependence ();
2021 *overlaps_b
= conflict_fn_no_dependence ();
2022 *last_conflicts
= integer_zero_node
;
2023 dependence_stats
.num_siv_independent
++;
2032 chrec_b = {10, +, -1}
2034 In this case, chrec_a will not overlap with chrec_b. */
2035 *overlaps_a
= conflict_fn_no_dependence ();
2036 *overlaps_b
= conflict_fn_no_dependence ();
2037 *last_conflicts
= integer_zero_node
;
2038 dependence_stats
.num_siv_independent
++;
2045 if (!chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
2047 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2048 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
2050 *overlaps_a
= conflict_fn_not_known ();
2051 *overlaps_b
= conflict_fn_not_known ();
2052 *last_conflicts
= chrec_dont_know
;
2053 dependence_stats
.num_siv_unimplemented
++;
2058 if (value2
== false)
2062 chrec_b = {10, +, -1}
2064 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
2066 HOST_WIDE_INT numiter
;
2067 struct loop
*loop
= get_chrec_loop (chrec_b
);
2069 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2070 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
2071 CHREC_RIGHT (chrec_b
));
2072 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
2073 *last_conflicts
= integer_one_node
;
2075 /* Perform weak-zero siv test to see if overlap is
2076 outside the loop bounds. */
2077 numiter
= max_stmt_executions_int (loop
);
2080 && compare_tree_int (tmp
, numiter
) > 0)
2082 free_conflict_function (*overlaps_a
);
2083 free_conflict_function (*overlaps_b
);
2084 *overlaps_a
= conflict_fn_no_dependence ();
2085 *overlaps_b
= conflict_fn_no_dependence ();
2086 *last_conflicts
= integer_zero_node
;
2087 dependence_stats
.num_siv_independent
++;
2090 dependence_stats
.num_siv_dependent
++;
2094 /* When the step does not divide the difference, there
2098 *overlaps_a
= conflict_fn_no_dependence ();
2099 *overlaps_b
= conflict_fn_no_dependence ();
2100 *last_conflicts
= integer_zero_node
;
2101 dependence_stats
.num_siv_independent
++;
2111 In this case, chrec_a will not overlap with chrec_b. */
2112 *overlaps_a
= conflict_fn_no_dependence ();
2113 *overlaps_b
= conflict_fn_no_dependence ();
2114 *last_conflicts
= integer_zero_node
;
2115 dependence_stats
.num_siv_independent
++;
2123 /* Helper recursive function for initializing the matrix A. Returns
2124 the initial value of CHREC. */
2127 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
2131 switch (TREE_CODE (chrec
))
2133 case POLYNOMIAL_CHREC
:
2134 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec
)) == INTEGER_CST
);
2136 A
[index
][0] = mult
* int_cst_value (CHREC_RIGHT (chrec
));
2137 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
2143 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2144 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
2146 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
2151 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2152 return chrec_convert (chrec_type (chrec
), op
, NULL
);
2157 /* Handle ~X as -1 - X. */
2158 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
2159 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
2160 build_int_cst (TREE_TYPE (chrec
), -1), op
);
2172 #define FLOOR_DIV(x,y) ((x) / (y))
2174 /* Solves the special case of the Diophantine equation:
2175 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2177 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
2178 number of iterations that loops X and Y run. The overlaps will be
2179 constructed as evolutions in dimension DIM. */
2182 compute_overlap_steps_for_affine_univar (int niter
, int step_a
, int step_b
,
2183 affine_fn
*overlaps_a
,
2184 affine_fn
*overlaps_b
,
2185 tree
*last_conflicts
, int dim
)
2187 if (((step_a
> 0 && step_b
> 0)
2188 || (step_a
< 0 && step_b
< 0)))
2190 int step_overlaps_a
, step_overlaps_b
;
2191 int gcd_steps_a_b
, last_conflict
, tau2
;
2193 gcd_steps_a_b
= gcd (step_a
, step_b
);
2194 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
2195 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
2199 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
2200 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
2201 last_conflict
= tau2
;
2202 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2205 *last_conflicts
= chrec_dont_know
;
2207 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
2208 build_int_cst (NULL_TREE
,
2210 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
2211 build_int_cst (NULL_TREE
,
2217 *overlaps_a
= affine_fn_cst (integer_zero_node
);
2218 *overlaps_b
= affine_fn_cst (integer_zero_node
);
2219 *last_conflicts
= integer_zero_node
;
2223 /* Solves the special case of a Diophantine equation where CHREC_A is
2224 an affine bivariate function, and CHREC_B is an affine univariate
2225 function. For example,
2227 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2229 has the following overlapping functions:
2231 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2232 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2233 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2235 FORNOW: This is a specialized implementation for a case occurring in
2236 a common benchmark. Implement the general algorithm. */
2239 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
2240 conflict_function
**overlaps_a
,
2241 conflict_function
**overlaps_b
,
2242 tree
*last_conflicts
)
2244 bool xz_p
, yz_p
, xyz_p
;
2245 int step_x
, step_y
, step_z
;
2246 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
2247 affine_fn overlaps_a_xz
, overlaps_b_xz
;
2248 affine_fn overlaps_a_yz
, overlaps_b_yz
;
2249 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
2250 affine_fn ova1
, ova2
, ovb
;
2251 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
2253 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
2254 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
2255 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
2257 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
2258 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2259 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2261 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
2263 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2264 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
2266 *overlaps_a
= conflict_fn_not_known ();
2267 *overlaps_b
= conflict_fn_not_known ();
2268 *last_conflicts
= chrec_dont_know
;
2272 niter
= MIN (niter_x
, niter_z
);
2273 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
2276 &last_conflicts_xz
, 1);
2277 niter
= MIN (niter_y
, niter_z
);
2278 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
2281 &last_conflicts_yz
, 2);
2282 niter
= MIN (niter_x
, niter_z
);
2283 niter
= MIN (niter_y
, niter
);
2284 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
2287 &last_conflicts_xyz
, 3);
2289 xz_p
= !integer_zerop (last_conflicts_xz
);
2290 yz_p
= !integer_zerop (last_conflicts_yz
);
2291 xyz_p
= !integer_zerop (last_conflicts_xyz
);
2293 if (xz_p
|| yz_p
|| xyz_p
)
2295 ova1
= affine_fn_cst (integer_zero_node
);
2296 ova2
= affine_fn_cst (integer_zero_node
);
2297 ovb
= affine_fn_cst (integer_zero_node
);
2300 affine_fn t0
= ova1
;
2303 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
2304 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
2305 affine_fn_free (t0
);
2306 affine_fn_free (t2
);
2307 *last_conflicts
= last_conflicts_xz
;
2311 affine_fn t0
= ova2
;
2314 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
2315 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
2316 affine_fn_free (t0
);
2317 affine_fn_free (t2
);
2318 *last_conflicts
= last_conflicts_yz
;
2322 affine_fn t0
= ova1
;
2323 affine_fn t2
= ova2
;
2326 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
2327 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
2328 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
2329 affine_fn_free (t0
);
2330 affine_fn_free (t2
);
2331 affine_fn_free (t4
);
2332 *last_conflicts
= last_conflicts_xyz
;
2334 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
2335 *overlaps_b
= conflict_fn (1, ovb
);
2339 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2340 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2341 *last_conflicts
= integer_zero_node
;
2344 affine_fn_free (overlaps_a_xz
);
2345 affine_fn_free (overlaps_b_xz
);
2346 affine_fn_free (overlaps_a_yz
);
2347 affine_fn_free (overlaps_b_yz
);
2348 affine_fn_free (overlaps_a_xyz
);
2349 affine_fn_free (overlaps_b_xyz
);
2352 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2355 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
2358 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
2361 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2364 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
2369 for (i
= 0; i
< m
; i
++)
2370 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
2373 /* Store the N x N identity matrix in MAT. */
2376 lambda_matrix_id (lambda_matrix mat
, int size
)
2380 for (i
= 0; i
< size
; i
++)
2381 for (j
= 0; j
< size
; j
++)
2382 mat
[i
][j
] = (i
== j
) ? 1 : 0;
2385 /* Return the first nonzero element of vector VEC1 between START and N.
2386 We must have START <= N. Returns N if VEC1 is the zero vector. */
2389 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
2392 while (j
< n
&& vec1
[j
] == 0)
2397 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2398 R2 = R2 + CONST1 * R1. */
2401 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
, int const1
)
2408 for (i
= 0; i
< n
; i
++)
2409 mat
[r2
][i
] += const1
* mat
[r1
][i
];
2412 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2413 and store the result in VEC2. */
2416 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
2417 int size
, int const1
)
2422 lambda_vector_clear (vec2
, size
);
2424 for (i
= 0; i
< size
; i
++)
2425 vec2
[i
] = const1
* vec1
[i
];
2428 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2431 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
2434 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
2437 /* Negate row R1 of matrix MAT which has N columns. */
2440 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
2442 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
2445 /* Return true if two vectors are equal. */
2448 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
2451 for (i
= 0; i
< size
; i
++)
2452 if (vec1
[i
] != vec2
[i
])
2457 /* Given an M x N integer matrix A, this function determines an M x
2458 M unimodular matrix U, and an M x N echelon matrix S such that
2459 "U.A = S". This decomposition is also known as "right Hermite".
2461 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2462 Restructuring Compilers" Utpal Banerjee. */
2465 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
2466 lambda_matrix S
, lambda_matrix U
)
2470 lambda_matrix_copy (A
, S
, m
, n
);
2471 lambda_matrix_id (U
, m
);
2473 for (j
= 0; j
< n
; j
++)
2475 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
2478 for (i
= m
- 1; i
>= i0
; i
--)
2480 while (S
[i
][j
] != 0)
2482 int sigma
, factor
, a
, b
;
2486 sigma
= (a
* b
< 0) ? -1: 1;
2489 factor
= sigma
* (a
/ b
);
2491 lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
);
2492 std::swap (S
[i
], S
[i
-1]);
2494 lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
);
2495 std::swap (U
[i
], U
[i
-1]);
2502 /* Determines the overlapping elements due to accesses CHREC_A and
2503 CHREC_B, that are affine functions. This function cannot handle
2504 symbolic evolution functions, ie. when initial conditions are
2505 parameters, because it uses lambda matrices of integers. */
2508 analyze_subscript_affine_affine (tree chrec_a
,
2510 conflict_function
**overlaps_a
,
2511 conflict_function
**overlaps_b
,
2512 tree
*last_conflicts
)
2514 unsigned nb_vars_a
, nb_vars_b
, dim
;
2515 HOST_WIDE_INT init_a
, init_b
, gamma
, gcd_alpha_beta
;
2516 lambda_matrix A
, U
, S
;
2517 struct obstack scratch_obstack
;
2519 if (eq_evolutions_p (chrec_a
, chrec_b
))
2521 /* The accessed index overlaps for each iteration in the
2523 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2524 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2525 *last_conflicts
= chrec_dont_know
;
2528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2529 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
2531 /* For determining the initial intersection, we have to solve a
2532 Diophantine equation. This is the most time consuming part.
2534 For answering to the question: "Is there a dependence?" we have
2535 to prove that there exists a solution to the Diophantine
2536 equation, and that the solution is in the iteration domain,
2537 i.e. the solution is positive or zero, and that the solution
2538 happens before the upper bound loop.nb_iterations. Otherwise
2539 there is no dependence. This function outputs a description of
2540 the iterations that hold the intersections. */
2542 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
2543 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
2545 gcc_obstack_init (&scratch_obstack
);
2547 dim
= nb_vars_a
+ nb_vars_b
;
2548 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
2549 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2550 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
2552 init_a
= int_cst_value (initialize_matrix_A (A
, chrec_a
, 0, 1));
2553 init_b
= int_cst_value (initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1));
2554 gamma
= init_b
- init_a
;
2556 /* Don't do all the hard work of solving the Diophantine equation
2557 when we already know the solution: for example,
2560 | gamma = 3 - 3 = 0.
2561 Then the first overlap occurs during the first iterations:
2562 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2566 if (nb_vars_a
== 1 && nb_vars_b
== 1)
2568 HOST_WIDE_INT step_a
, step_b
;
2569 HOST_WIDE_INT niter
, niter_a
, niter_b
;
2572 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
2573 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
2574 niter
= MIN (niter_a
, niter_b
);
2575 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
2576 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
2578 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
2581 *overlaps_a
= conflict_fn (1, ova
);
2582 *overlaps_b
= conflict_fn (1, ovb
);
2585 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
2586 compute_overlap_steps_for_affine_1_2
2587 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
2589 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
2590 compute_overlap_steps_for_affine_1_2
2591 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
2595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2596 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
2597 *overlaps_a
= conflict_fn_not_known ();
2598 *overlaps_b
= conflict_fn_not_known ();
2599 *last_conflicts
= chrec_dont_know
;
2601 goto end_analyze_subs_aa
;
2605 lambda_matrix_right_hermite (A
, dim
, 1, S
, U
);
2610 lambda_matrix_row_negate (U
, dim
, 0);
2612 gcd_alpha_beta
= S
[0][0];
2614 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2615 but that is a quite strange case. Instead of ICEing, answer
2617 if (gcd_alpha_beta
== 0)
2619 *overlaps_a
= conflict_fn_not_known ();
2620 *overlaps_b
= conflict_fn_not_known ();
2621 *last_conflicts
= chrec_dont_know
;
2622 goto end_analyze_subs_aa
;
2625 /* The classic "gcd-test". */
2626 if (!int_divides_p (gcd_alpha_beta
, gamma
))
2628 /* The "gcd-test" has determined that there is no integer
2629 solution, i.e. there is no dependence. */
2630 *overlaps_a
= conflict_fn_no_dependence ();
2631 *overlaps_b
= conflict_fn_no_dependence ();
2632 *last_conflicts
= integer_zero_node
;
2635 /* Both access functions are univariate. This includes SIV and MIV cases. */
2636 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
2638 /* Both functions should have the same evolution sign. */
2639 if (((A
[0][0] > 0 && -A
[1][0] > 0)
2640 || (A
[0][0] < 0 && -A
[1][0] < 0)))
2642 /* The solutions are given by:
2644 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2647 For a given integer t. Using the following variables,
2649 | i0 = u11 * gamma / gcd_alpha_beta
2650 | j0 = u12 * gamma / gcd_alpha_beta
2657 | y0 = j0 + j1 * t. */
2658 HOST_WIDE_INT i0
, j0
, i1
, j1
;
2660 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
2661 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
2665 if ((i1
== 0 && i0
< 0)
2666 || (j1
== 0 && j0
< 0))
2668 /* There is no solution.
2669 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2670 falls in here, but for the moment we don't look at the
2671 upper bound of the iteration domain. */
2672 *overlaps_a
= conflict_fn_no_dependence ();
2673 *overlaps_b
= conflict_fn_no_dependence ();
2674 *last_conflicts
= integer_zero_node
;
2675 goto end_analyze_subs_aa
;
2678 if (i1
> 0 && j1
> 0)
2680 HOST_WIDE_INT niter_a
2681 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
2682 HOST_WIDE_INT niter_b
2683 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
2684 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
2686 /* (X0, Y0) is a solution of the Diophantine equation:
2687 "chrec_a (X0) = chrec_b (Y0)". */
2688 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
2690 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
2691 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
2693 /* (X1, Y1) is the smallest positive solution of the eq
2694 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2695 first conflict occurs. */
2696 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
2697 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
2698 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
2702 HOST_WIDE_INT tau2
= MIN (FLOOR_DIV (niter
- i0
, i1
),
2703 FLOOR_DIV (niter
- j0
, j1
));
2704 HOST_WIDE_INT last_conflict
= tau2
- (x1
- i0
)/i1
;
2706 /* If the overlap occurs outside of the bounds of the
2707 loop, there is no dependence. */
2708 if (x1
>= niter
|| y1
>= niter
)
2710 *overlaps_a
= conflict_fn_no_dependence ();
2711 *overlaps_b
= conflict_fn_no_dependence ();
2712 *last_conflicts
= integer_zero_node
;
2713 goto end_analyze_subs_aa
;
2716 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
2719 *last_conflicts
= chrec_dont_know
;
2723 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
2725 build_int_cst (NULL_TREE
, i1
)));
2728 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
2730 build_int_cst (NULL_TREE
, j1
)));
2734 /* FIXME: For the moment, the upper bound of the
2735 iteration domain for i and j is not checked. */
2736 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2737 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2738 *overlaps_a
= conflict_fn_not_known ();
2739 *overlaps_b
= conflict_fn_not_known ();
2740 *last_conflicts
= chrec_dont_know
;
2745 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2746 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2747 *overlaps_a
= conflict_fn_not_known ();
2748 *overlaps_b
= conflict_fn_not_known ();
2749 *last_conflicts
= chrec_dont_know
;
2754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2755 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
2756 *overlaps_a
= conflict_fn_not_known ();
2757 *overlaps_b
= conflict_fn_not_known ();
2758 *last_conflicts
= chrec_dont_know
;
2761 end_analyze_subs_aa
:
2762 obstack_free (&scratch_obstack
, NULL
);
2763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2765 fprintf (dump_file
, " (overlaps_a = ");
2766 dump_conflict_function (dump_file
, *overlaps_a
);
2767 fprintf (dump_file
, ")\n (overlaps_b = ");
2768 dump_conflict_function (dump_file
, *overlaps_b
);
2769 fprintf (dump_file
, "))\n");
2773 /* Returns true when analyze_subscript_affine_affine can be used for
2774 determining the dependence relation between chrec_a and chrec_b,
2775 that contain symbols. This function modifies chrec_a and chrec_b
2776 such that the analysis result is the same, and such that they don't
2777 contain symbols, and then can safely be passed to the analyzer.
2779 Example: The analysis of the following tuples of evolutions produce
2780 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2783 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2784 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2788 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
2790 tree diff
, type
, left_a
, left_b
, right_b
;
2792 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
2793 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
2794 /* FIXME: For the moment not handled. Might be refined later. */
2797 type
= chrec_type (*chrec_a
);
2798 left_a
= CHREC_LEFT (*chrec_a
);
2799 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
2800 diff
= chrec_fold_minus (type
, left_a
, left_b
);
2802 if (!evolution_function_is_constant_p (diff
))
2805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2806 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
2808 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
2809 diff
, CHREC_RIGHT (*chrec_a
));
2810 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
2811 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
2812 build_int_cst (type
, 0),
2817 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2818 *OVERLAPS_B are initialized to the functions that describe the
2819 relation between the elements accessed twice by CHREC_A and
2820 CHREC_B. For k >= 0, the following property is verified:
2822 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2825 analyze_siv_subscript (tree chrec_a
,
2827 conflict_function
**overlaps_a
,
2828 conflict_function
**overlaps_b
,
2829 tree
*last_conflicts
,
2832 dependence_stats
.num_siv
++;
2834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2835 fprintf (dump_file
, "(analyze_siv_subscript \n");
2837 if (evolution_function_is_constant_p (chrec_a
)
2838 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2839 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
2840 overlaps_a
, overlaps_b
, last_conflicts
);
2842 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2843 && evolution_function_is_constant_p (chrec_b
))
2844 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
2845 overlaps_b
, overlaps_a
, last_conflicts
);
2847 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
2848 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
2850 if (!chrec_contains_symbols (chrec_a
)
2851 && !chrec_contains_symbols (chrec_b
))
2853 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2854 overlaps_a
, overlaps_b
,
2857 if (CF_NOT_KNOWN_P (*overlaps_a
)
2858 || CF_NOT_KNOWN_P (*overlaps_b
))
2859 dependence_stats
.num_siv_unimplemented
++;
2860 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2861 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2862 dependence_stats
.num_siv_independent
++;
2864 dependence_stats
.num_siv_dependent
++;
2866 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
2869 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
2870 overlaps_a
, overlaps_b
,
2873 if (CF_NOT_KNOWN_P (*overlaps_a
)
2874 || CF_NOT_KNOWN_P (*overlaps_b
))
2875 dependence_stats
.num_siv_unimplemented
++;
2876 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
2877 || CF_NO_DEPENDENCE_P (*overlaps_b
))
2878 dependence_stats
.num_siv_independent
++;
2880 dependence_stats
.num_siv_dependent
++;
2883 goto siv_subscript_dontknow
;
2888 siv_subscript_dontknow
:;
2889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2890 fprintf (dump_file
, " siv test failed: unimplemented");
2891 *overlaps_a
= conflict_fn_not_known ();
2892 *overlaps_b
= conflict_fn_not_known ();
2893 *last_conflicts
= chrec_dont_know
;
2894 dependence_stats
.num_siv_unimplemented
++;
2897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2898 fprintf (dump_file
, ")\n");
2901 /* Returns false if we can prove that the greatest common divisor of the steps
2902 of CHREC does not divide CST, false otherwise. */
2905 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
2907 HOST_WIDE_INT cd
= 0, val
;
2910 if (!tree_fits_shwi_p (cst
))
2912 val
= tree_to_shwi (cst
);
2914 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
2916 step
= CHREC_RIGHT (chrec
);
2917 if (!tree_fits_shwi_p (step
))
2919 cd
= gcd (cd
, tree_to_shwi (step
));
2920 chrec
= CHREC_LEFT (chrec
);
2923 return val
% cd
== 0;
2926 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2927 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2928 functions that describe the relation between the elements accessed
2929 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2932 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2935 analyze_miv_subscript (tree chrec_a
,
2937 conflict_function
**overlaps_a
,
2938 conflict_function
**overlaps_b
,
2939 tree
*last_conflicts
,
2940 struct loop
*loop_nest
)
2942 tree type
, difference
;
2944 dependence_stats
.num_miv
++;
2945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2946 fprintf (dump_file
, "(analyze_miv_subscript \n");
2948 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
2949 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
2950 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
2951 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
2953 if (eq_evolutions_p (chrec_a
, chrec_b
))
2955 /* Access functions are the same: all the elements are accessed
2956 in the same order. */
2957 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2958 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
2959 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
2960 dependence_stats
.num_miv_dependent
++;
2963 else if (evolution_function_is_constant_p (difference
)
2964 /* For the moment, the following is verified:
2965 evolution_function_is_affine_multivariate_p (chrec_a,
2967 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
2969 /* testsuite/.../ssa-chrec-33.c
2970 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2972 The difference is 1, and all the evolution steps are multiples
2973 of 2, consequently there are no overlapping elements. */
2974 *overlaps_a
= conflict_fn_no_dependence ();
2975 *overlaps_b
= conflict_fn_no_dependence ();
2976 *last_conflicts
= integer_zero_node
;
2977 dependence_stats
.num_miv_independent
++;
2980 else if (evolution_function_is_affine_multivariate_p (chrec_a
, loop_nest
->num
)
2981 && !chrec_contains_symbols (chrec_a
)
2982 && evolution_function_is_affine_multivariate_p (chrec_b
, loop_nest
->num
)
2983 && !chrec_contains_symbols (chrec_b
))
2985 /* testsuite/.../ssa-chrec-35.c
2986 {0, +, 1}_2 vs. {0, +, 1}_3
2987 the overlapping elements are respectively located at iterations:
2988 {0, +, 1}_x and {0, +, 1}_x,
2989 in other words, we have the equality:
2990 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2993 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2994 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2996 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2997 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2999 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
3000 overlaps_a
, overlaps_b
, last_conflicts
);
3002 if (CF_NOT_KNOWN_P (*overlaps_a
)
3003 || CF_NOT_KNOWN_P (*overlaps_b
))
3004 dependence_stats
.num_miv_unimplemented
++;
3005 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
3006 || CF_NO_DEPENDENCE_P (*overlaps_b
))
3007 dependence_stats
.num_miv_independent
++;
3009 dependence_stats
.num_miv_dependent
++;
3014 /* When the analysis is too difficult, answer "don't know". */
3015 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3016 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
3018 *overlaps_a
= conflict_fn_not_known ();
3019 *overlaps_b
= conflict_fn_not_known ();
3020 *last_conflicts
= chrec_dont_know
;
3021 dependence_stats
.num_miv_unimplemented
++;
3024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3025 fprintf (dump_file
, ")\n");
3028 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3029 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
3030 OVERLAP_ITERATIONS_B are initialized with two functions that
3031 describe the iterations that contain conflicting elements.
3033 Remark: For an integer k >= 0, the following equality is true:
3035 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3039 analyze_overlapping_iterations (tree chrec_a
,
3041 conflict_function
**overlap_iterations_a
,
3042 conflict_function
**overlap_iterations_b
,
3043 tree
*last_conflicts
, struct loop
*loop_nest
)
3045 unsigned int lnn
= loop_nest
->num
;
3047 dependence_stats
.num_subscript_tests
++;
3049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3051 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
3052 fprintf (dump_file
, " (chrec_a = ");
3053 print_generic_expr (dump_file
, chrec_a
, 0);
3054 fprintf (dump_file
, ")\n (chrec_b = ");
3055 print_generic_expr (dump_file
, chrec_b
, 0);
3056 fprintf (dump_file
, ")\n");
3059 if (chrec_a
== NULL_TREE
3060 || chrec_b
== NULL_TREE
3061 || chrec_contains_undetermined (chrec_a
)
3062 || chrec_contains_undetermined (chrec_b
))
3064 dependence_stats
.num_subscript_undetermined
++;
3066 *overlap_iterations_a
= conflict_fn_not_known ();
3067 *overlap_iterations_b
= conflict_fn_not_known ();
3070 /* If they are the same chrec, and are affine, they overlap
3071 on every iteration. */
3072 else if (eq_evolutions_p (chrec_a
, chrec_b
)
3073 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3074 || operand_equal_p (chrec_a
, chrec_b
, 0)))
3076 dependence_stats
.num_same_subscript_function
++;
3077 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3078 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3079 *last_conflicts
= chrec_dont_know
;
3082 /* If they aren't the same, and aren't affine, we can't do anything
3084 else if ((chrec_contains_symbols (chrec_a
)
3085 || chrec_contains_symbols (chrec_b
))
3086 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
3087 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
3089 dependence_stats
.num_subscript_undetermined
++;
3090 *overlap_iterations_a
= conflict_fn_not_known ();
3091 *overlap_iterations_b
= conflict_fn_not_known ();
3094 else if (ziv_subscript_p (chrec_a
, chrec_b
))
3095 analyze_ziv_subscript (chrec_a
, chrec_b
,
3096 overlap_iterations_a
, overlap_iterations_b
,
3099 else if (siv_subscript_p (chrec_a
, chrec_b
))
3100 analyze_siv_subscript (chrec_a
, chrec_b
,
3101 overlap_iterations_a
, overlap_iterations_b
,
3102 last_conflicts
, lnn
);
3105 analyze_miv_subscript (chrec_a
, chrec_b
,
3106 overlap_iterations_a
, overlap_iterations_b
,
3107 last_conflicts
, loop_nest
);
3109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3111 fprintf (dump_file
, " (overlap_iterations_a = ");
3112 dump_conflict_function (dump_file
, *overlap_iterations_a
);
3113 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
3114 dump_conflict_function (dump_file
, *overlap_iterations_b
);
3115 fprintf (dump_file
, "))\n");
3119 /* Helper function for uniquely inserting distance vectors. */
3122 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
3127 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, v
)
3128 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
3131 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
3134 /* Helper function for uniquely inserting direction vectors. */
3137 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
3142 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr
), i
, v
)
3143 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
3146 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
3149 /* Add a distance of 1 on all the loops outer than INDEX. If we
3150 haven't yet determined a distance for this outer loop, push a new
3151 distance vector composed of the previous distance, and a distance
3152 of 1 for this outer loop. Example:
3160 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
3161 save (0, 1), then we have to save (1, 0). */
3164 add_outer_distances (struct data_dependence_relation
*ddr
,
3165 lambda_vector dist_v
, int index
)
3167 /* For each outer loop where init_v is not set, the accesses are
3168 in dependence of distance 1 in the loop. */
3169 while (--index
>= 0)
3171 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3172 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3174 save_dist_v (ddr
, save_v
);
3178 /* Return false when fail to represent the data dependence as a
3179 distance vector. INIT_B is set to true when a component has been
3180 added to the distance vector DIST_V. INDEX_CARRY is then set to
3181 the index in DIST_V that carries the dependence. */
3184 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
3185 struct data_reference
*ddr_a
,
3186 struct data_reference
*ddr_b
,
3187 lambda_vector dist_v
, bool *init_b
,
3191 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3193 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3195 tree access_fn_a
, access_fn_b
;
3196 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
3198 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3200 non_affine_dependence_relation (ddr
);
3204 access_fn_a
= DR_ACCESS_FN (ddr_a
, i
);
3205 access_fn_b
= DR_ACCESS_FN (ddr_b
, i
);
3207 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
3208 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
3211 int var_a
= CHREC_VARIABLE (access_fn_a
);
3212 int var_b
= CHREC_VARIABLE (access_fn_b
);
3215 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
3217 non_affine_dependence_relation (ddr
);
3221 dist
= int_cst_value (SUB_DISTANCE (subscript
));
3222 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
3223 *index_carry
= MIN (index
, *index_carry
);
3225 /* This is the subscript coupling test. If we have already
3226 recorded a distance for this loop (a distance coming from
3227 another subscript), it should be the same. For example,
3228 in the following code, there is no dependence:
3235 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
3237 finalize_ddr_dependent (ddr
, chrec_known
);
3241 dist_v
[index
] = dist
;
3245 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
3247 /* This can be for example an affine vs. constant dependence
3248 (T[i] vs. T[3]) that is not an affine dependence and is
3249 not representable as a distance vector. */
3250 non_affine_dependence_relation (ddr
);
3258 /* Return true when the DDR contains only constant access functions. */
3261 constant_access_functions (const struct data_dependence_relation
*ddr
)
3265 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3266 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr
), i
))
3267 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr
), i
)))
3273 /* Helper function for the case where DDR_A and DDR_B are the same
3274 multivariate access function with a constant step. For an example
3278 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
3281 tree c_1
= CHREC_LEFT (c_2
);
3282 tree c_0
= CHREC_LEFT (c_1
);
3283 lambda_vector dist_v
;
3286 /* Polynomials with more than 2 variables are not handled yet. When
3287 the evolution steps are parameters, it is not possible to
3288 represent the dependence using classical distance vectors. */
3289 if (TREE_CODE (c_0
) != INTEGER_CST
3290 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
3291 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
3293 DDR_AFFINE_P (ddr
) = false;
3297 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
3298 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
3300 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3301 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3302 v1
= int_cst_value (CHREC_RIGHT (c_1
));
3303 v2
= int_cst_value (CHREC_RIGHT (c_2
));
3316 save_dist_v (ddr
, dist_v
);
3318 add_outer_distances (ddr
, dist_v
, x_1
);
3321 /* Helper function for the case where DDR_A and DDR_B are the same
3322 access functions. */
3325 add_other_self_distances (struct data_dependence_relation
*ddr
)
3327 lambda_vector dist_v
;
3329 int index_carry
= DDR_NB_LOOPS (ddr
);
3331 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3333 tree access_fun
= DR_ACCESS_FN (DDR_A (ddr
), i
);
3335 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
3337 if (!evolution_function_is_univariate_p (access_fun
))
3339 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
3341 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
3345 access_fun
= DR_ACCESS_FN (DDR_A (ddr
), 0);
3347 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
3348 add_multivariate_self_dist (ddr
, access_fun
);
3350 /* The evolution step is not constant: it varies in
3351 the outer loop, so this cannot be represented by a
3352 distance vector. For example in pr34635.c the
3353 evolution is {0, +, {0, +, 4}_1}_2. */
3354 DDR_AFFINE_P (ddr
) = false;
3359 index_carry
= MIN (index_carry
,
3360 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
3361 DDR_LOOP_NEST (ddr
)));
3365 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3366 add_outer_distances (ddr
, dist_v
, index_carry
);
3370 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
3372 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3374 dist_v
[DDR_INNER_LOOP (ddr
)] = 1;
3375 save_dist_v (ddr
, dist_v
);
3378 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3379 is the case for example when access functions are the same and
3380 equal to a constant, as in:
3387 in which case the distance vectors are (0) and (1). */
3390 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
3394 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3396 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
3397 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
3398 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
3400 for (j
= 0; j
< ca
->n
; j
++)
3401 if (affine_function_zero_p (ca
->fns
[j
]))
3403 insert_innermost_unit_dist_vector (ddr
);
3407 for (j
= 0; j
< cb
->n
; j
++)
3408 if (affine_function_zero_p (cb
->fns
[j
]))
3410 insert_innermost_unit_dist_vector (ddr
);
3416 /* Compute the classic per loop distance vector. DDR is the data
3417 dependence relation to build a vector from. Return false when fail
3418 to represent the data dependence as a distance vector. */
3421 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
3422 struct loop
*loop_nest
)
3424 bool init_b
= false;
3425 int index_carry
= DDR_NB_LOOPS (ddr
);
3426 lambda_vector dist_v
;
3428 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
3431 if (same_access_functions (ddr
))
3433 /* Save the 0 vector. */
3434 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3435 save_dist_v (ddr
, dist_v
);
3437 if (constant_access_functions (ddr
))
3438 add_distance_for_zero_overlaps (ddr
);
3440 if (DDR_NB_LOOPS (ddr
) > 1)
3441 add_other_self_distances (ddr
);
3446 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3447 if (!build_classic_dist_vector_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
),
3448 dist_v
, &init_b
, &index_carry
))
3451 /* Save the distance vector if we initialized one. */
3454 /* Verify a basic constraint: classic distance vectors should
3455 always be lexicographically positive.
3457 Data references are collected in the order of execution of
3458 the program, thus for the following loop
3460 | for (i = 1; i < 100; i++)
3461 | for (j = 1; j < 100; j++)
3463 | t = T[j+1][i-1]; // A
3464 | T[j][i] = t + 2; // B
3467 references are collected following the direction of the wind:
3468 A then B. The data dependence tests are performed also
3469 following this order, such that we're looking at the distance
3470 separating the elements accessed by A from the elements later
3471 accessed by B. But in this example, the distance returned by
3472 test_dep (A, B) is lexicographically negative (-1, 1), that
3473 means that the access A occurs later than B with respect to
3474 the outer loop, ie. we're actually looking upwind. In this
3475 case we solve test_dep (B, A) looking downwind to the
3476 lexicographically positive solution, that returns the
3477 distance vector (1, -1). */
3478 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
3480 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3481 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3484 compute_subscript_distance (ddr
);
3485 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3486 save_v
, &init_b
, &index_carry
))
3488 save_dist_v (ddr
, save_v
);
3489 DDR_REVERSED_P (ddr
) = true;
3491 /* In this case there is a dependence forward for all the
3494 | for (k = 1; k < 100; k++)
3495 | for (i = 1; i < 100; i++)
3496 | for (j = 1; j < 100; j++)
3498 | t = T[j+1][i-1]; // A
3499 | T[j][i] = t + 2; // B
3507 if (DDR_NB_LOOPS (ddr
) > 1)
3509 add_outer_distances (ddr
, save_v
, index_carry
);
3510 add_outer_distances (ddr
, dist_v
, index_carry
);
3515 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3516 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
3518 if (DDR_NB_LOOPS (ddr
) > 1)
3520 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3522 if (!subscript_dependence_tester_1 (ddr
, DDR_B (ddr
),
3523 DDR_A (ddr
), loop_nest
))
3525 compute_subscript_distance (ddr
);
3526 if (!build_classic_dist_vector_1 (ddr
, DDR_B (ddr
), DDR_A (ddr
),
3527 opposite_v
, &init_b
,
3531 save_dist_v (ddr
, save_v
);
3532 add_outer_distances (ddr
, dist_v
, index_carry
);
3533 add_outer_distances (ddr
, opposite_v
, index_carry
);
3536 save_dist_v (ddr
, save_v
);
3541 /* There is a distance of 1 on all the outer loops: Example:
3542 there is a dependence of distance 1 on loop_1 for the array A.
3548 add_outer_distances (ddr
, dist_v
,
3549 lambda_vector_first_nz (dist_v
,
3550 DDR_NB_LOOPS (ddr
), 0));
3553 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3557 fprintf (dump_file
, "(build_classic_dist_vector\n");
3558 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
3560 fprintf (dump_file
, " dist_vector = (");
3561 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
3562 DDR_NB_LOOPS (ddr
));
3563 fprintf (dump_file
, " )\n");
3565 fprintf (dump_file
, ")\n");
3571 /* Return the direction for a given distance.
3572 FIXME: Computing dir this way is suboptimal, since dir can catch
3573 cases that dist is unable to represent. */
3575 static inline enum data_dependence_direction
3576 dir_from_dist (int dist
)
3579 return dir_positive
;
3581 return dir_negative
;
3586 /* Compute the classic per loop direction vector. DDR is the data
3587 dependence relation to build a vector from. */
3590 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
3593 lambda_vector dist_v
;
3595 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
3597 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3599 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3600 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3602 save_dir_v (ddr
, dir_v
);
3606 /* Helper function. Returns true when there is a dependence between
3607 data references DRA and DRB. */
3610 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
3611 struct data_reference
*dra
,
3612 struct data_reference
*drb
,
3613 struct loop
*loop_nest
)
3616 tree last_conflicts
;
3617 struct subscript
*subscript
;
3618 tree res
= NULL_TREE
;
3620 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
3622 conflict_function
*overlaps_a
, *overlaps_b
;
3624 analyze_overlapping_iterations (DR_ACCESS_FN (dra
, i
),
3625 DR_ACCESS_FN (drb
, i
),
3626 &overlaps_a
, &overlaps_b
,
3627 &last_conflicts
, loop_nest
);
3629 if (SUB_CONFLICTS_IN_A (subscript
))
3630 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
3631 if (SUB_CONFLICTS_IN_B (subscript
))
3632 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
3634 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
3635 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
3636 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
3638 /* If there is any undetermined conflict function we have to
3639 give a conservative answer in case we cannot prove that
3640 no dependence exists when analyzing another subscript. */
3641 if (CF_NOT_KNOWN_P (overlaps_a
)
3642 || CF_NOT_KNOWN_P (overlaps_b
))
3644 res
= chrec_dont_know
;
3648 /* When there is a subscript with no dependence we can stop. */
3649 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
3650 || CF_NO_DEPENDENCE_P (overlaps_b
))
3657 if (res
== NULL_TREE
)
3660 if (res
== chrec_known
)
3661 dependence_stats
.num_dependence_independent
++;
3663 dependence_stats
.num_dependence_undetermined
++;
3664 finalize_ddr_dependent (ddr
, res
);
3668 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3671 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
3672 struct loop
*loop_nest
)
3674 if (subscript_dependence_tester_1 (ddr
, DDR_A (ddr
), DDR_B (ddr
), loop_nest
))
3675 dependence_stats
.num_dependence_dependent
++;
3677 compute_subscript_distance (ddr
);
3678 if (build_classic_dist_vector (ddr
, loop_nest
))
3679 build_classic_dir_vector (ddr
);
3682 /* Returns true when all the access functions of A are affine or
3683 constant with respect to LOOP_NEST. */
3686 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
3687 const struct loop
*loop_nest
)
3690 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
3693 FOR_EACH_VEC_ELT (fns
, i
, t
)
3694 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
3695 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
3701 /* Initializes an equation for an OMEGA problem using the information
3702 contained in the ACCESS_FUN. Returns true when the operation
3705 PB is the omega constraint system.
3706 EQ is the number of the equation to be initialized.
3707 OFFSET is used for shifting the variables names in the constraints:
3708 a constrain is composed of 2 * the number of variables surrounding
3709 dependence accesses. OFFSET is set either to 0 for the first n variables,
3710 then it is set to n.
3711 ACCESS_FUN is expected to be an affine chrec. */
3714 init_omega_eq_with_af (omega_pb pb
, unsigned eq
,
3715 unsigned int offset
, tree access_fun
,
3716 struct data_dependence_relation
*ddr
)
3718 switch (TREE_CODE (access_fun
))
3720 case POLYNOMIAL_CHREC
:
3722 tree left
= CHREC_LEFT (access_fun
);
3723 tree right
= CHREC_RIGHT (access_fun
);
3724 int var
= CHREC_VARIABLE (access_fun
);
3727 if (TREE_CODE (right
) != INTEGER_CST
)
3730 var_idx
= index_in_loop_nest (var
, DDR_LOOP_NEST (ddr
));
3731 pb
->eqs
[eq
].coef
[offset
+ var_idx
+ 1] = int_cst_value (right
);
3733 /* Compute the innermost loop index. */
3734 DDR_INNER_LOOP (ddr
) = MAX (DDR_INNER_LOOP (ddr
), var_idx
);
3737 pb
->eqs
[eq
].coef
[var_idx
+ DDR_NB_LOOPS (ddr
) + 1]
3738 += int_cst_value (right
);
3740 switch (TREE_CODE (left
))
3742 case POLYNOMIAL_CHREC
:
3743 return init_omega_eq_with_af (pb
, eq
, offset
, left
, ddr
);
3746 pb
->eqs
[eq
].coef
[0] += int_cst_value (left
);
3755 pb
->eqs
[eq
].coef
[0] += int_cst_value (access_fun
);
3763 /* As explained in the comments preceding init_omega_for_ddr, we have
3764 to set up a system for each loop level, setting outer loops
3765 variation to zero, and current loop variation to positive or zero.
3766 Save each lexico positive distance vector. */
3769 omega_extract_distance_vectors (omega_pb pb
,
3770 struct data_dependence_relation
*ddr
)
3774 struct loop
*loopi
, *loopj
;
3775 enum omega_result res
;
3777 /* Set a new problem for each loop in the nest. The basis is the
3778 problem that we have initialized until now. On top of this we
3779 add new constraints. */
3780 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3781 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3784 omega_pb copy
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
),
3785 DDR_NB_LOOPS (ddr
));
3787 omega_copy_problem (copy
, pb
);
3789 /* For all the outer loops "loop_j", add "dj = 0". */
3790 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3792 eq
= omega_add_zero_eq (copy
, omega_black
);
3793 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3796 /* For "loop_i", add "0 <= di". */
3797 geq
= omega_add_zero_geq (copy
, omega_black
);
3798 copy
->geqs
[geq
].coef
[i
+ 1] = 1;
3800 /* Reduce the constraint system, and test that the current
3801 problem is feasible. */
3802 res
= omega_simplify_problem (copy
);
3803 if (res
== omega_false
3804 || res
== omega_unknown
3805 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3808 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3809 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3811 dist
= copy
->subs
[eq
].coef
[0];
3817 /* Reinitialize problem... */
3818 omega_copy_problem (copy
, pb
);
3819 for (j
= 0; j
< i
&& DDR_LOOP_NEST (ddr
).iterate (j
, &loopj
); j
++)
3821 eq
= omega_add_zero_eq (copy
, omega_black
);
3822 copy
->eqs
[eq
].coef
[j
+ 1] = 1;
3825 /* ..., but this time "di = 1". */
3826 eq
= omega_add_zero_eq (copy
, omega_black
);
3827 copy
->eqs
[eq
].coef
[i
+ 1] = 1;
3828 copy
->eqs
[eq
].coef
[0] = -1;
3830 res
= omega_simplify_problem (copy
);
3831 if (res
== omega_false
3832 || res
== omega_unknown
3833 || copy
->num_geqs
> (int) DDR_NB_LOOPS (ddr
))
3836 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3837 if (copy
->subs
[eq
].key
== (int) i
+ 1)
3839 dist
= copy
->subs
[eq
].coef
[0];
3845 /* Save the lexicographically positive distance vector. */
3848 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3849 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
3853 for (eq
= 0; eq
< copy
->num_subs
; eq
++)
3854 if (copy
->subs
[eq
].key
> 0)
3856 dist
= copy
->subs
[eq
].coef
[0];
3857 dist_v
[copy
->subs
[eq
].key
- 1] = dist
;
3860 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
3861 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
3863 save_dist_v (ddr
, dist_v
);
3864 save_dir_v (ddr
, dir_v
);
3868 omega_free_problem (copy
);
3872 /* This is called for each subscript of a tuple of data references:
3873 insert an equality for representing the conflicts. */
3876 omega_setup_subscript (tree access_fun_a
, tree access_fun_b
,
3877 struct data_dependence_relation
*ddr
,
3878 omega_pb pb
, bool *maybe_dependent
)
3881 tree type
= signed_type_for_types (TREE_TYPE (access_fun_a
),
3882 TREE_TYPE (access_fun_b
));
3883 tree fun_a
= chrec_convert (type
, access_fun_a
, NULL
);
3884 tree fun_b
= chrec_convert (type
, access_fun_b
, NULL
);
3885 tree difference
= chrec_fold_minus (type
, fun_a
, fun_b
);
3888 /* When the fun_a - fun_b is not constant, the dependence is not
3889 captured by the classic distance vector representation. */
3890 if (TREE_CODE (difference
) != INTEGER_CST
)
3894 if (ziv_subscript_p (fun_a
, fun_b
) && !integer_zerop (difference
))
3896 /* There is no dependence. */
3897 *maybe_dependent
= false;
3901 minus_one
= build_int_cst (type
, -1);
3902 fun_b
= chrec_fold_multiply (type
, fun_b
, minus_one
);
3904 eq
= omega_add_zero_eq (pb
, omega_black
);
3905 if (!init_omega_eq_with_af (pb
, eq
, DDR_NB_LOOPS (ddr
), fun_a
, ddr
)
3906 || !init_omega_eq_with_af (pb
, eq
, 0, fun_b
, ddr
))
3907 /* There is probably a dependence, but the system of
3908 constraints cannot be built: answer "don't know". */
3912 if (DDR_NB_LOOPS (ddr
) != 0 && pb
->eqs
[eq
].coef
[0]
3913 && !int_divides_p (lambda_vector_gcd
3914 ((lambda_vector
) &(pb
->eqs
[eq
].coef
[1]),
3915 2 * DDR_NB_LOOPS (ddr
)),
3916 pb
->eqs
[eq
].coef
[0]))
3918 /* There is no dependence. */
3919 *maybe_dependent
= false;
3926 /* Helper function, same as init_omega_for_ddr but specialized for
3927 data references A and B. */
3930 init_omega_for_ddr_1 (struct data_reference
*dra
, struct data_reference
*drb
,
3931 struct data_dependence_relation
*ddr
,
3932 omega_pb pb
, bool *maybe_dependent
)
3937 unsigned nb_loops
= DDR_NB_LOOPS (ddr
);
3939 /* Insert an equality per subscript. */
3940 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
3942 if (!omega_setup_subscript (DR_ACCESS_FN (dra
, i
), DR_ACCESS_FN (drb
, i
),
3943 ddr
, pb
, maybe_dependent
))
3945 else if (*maybe_dependent
== false)
3947 /* There is no dependence. */
3948 DDR_ARE_DEPENDENT (ddr
) = chrec_known
;
3953 /* Insert inequalities: constraints corresponding to the iteration
3954 domain, i.e. the loops surrounding the references "loop_x" and
3955 the distance variables "dx". The layout of the OMEGA
3956 representation is as follows:
3957 - coef[0] is the constant
3958 - coef[1..nb_loops] are the protected variables that will not be
3959 removed by the solver: the "dx"
3960 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3962 for (i
= 0; i
<= DDR_INNER_LOOP (ddr
)
3963 && DDR_LOOP_NEST (ddr
).iterate (i
, &loopi
); i
++)
3965 HOST_WIDE_INT nbi
= max_stmt_executions_int (loopi
);
3968 ineq
= omega_add_zero_geq (pb
, omega_black
);
3969 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3971 /* 0 <= loop_x + dx */
3972 ineq
= omega_add_zero_geq (pb
, omega_black
);
3973 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = 1;
3974 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3978 /* loop_x <= nb_iters */
3979 ineq
= omega_add_zero_geq (pb
, omega_black
);
3980 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3981 pb
->geqs
[ineq
].coef
[0] = nbi
;
3983 /* loop_x + dx <= nb_iters */
3984 ineq
= omega_add_zero_geq (pb
, omega_black
);
3985 pb
->geqs
[ineq
].coef
[i
+ nb_loops
+ 1] = -1;
3986 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3987 pb
->geqs
[ineq
].coef
[0] = nbi
;
3989 /* A step "dx" bigger than nb_iters is not feasible, so
3990 add "0 <= nb_iters + dx", */
3991 ineq
= omega_add_zero_geq (pb
, omega_black
);
3992 pb
->geqs
[ineq
].coef
[i
+ 1] = 1;
3993 pb
->geqs
[ineq
].coef
[0] = nbi
;
3994 /* and "dx <= nb_iters". */
3995 ineq
= omega_add_zero_geq (pb
, omega_black
);
3996 pb
->geqs
[ineq
].coef
[i
+ 1] = -1;
3997 pb
->geqs
[ineq
].coef
[0] = nbi
;
4001 omega_extract_distance_vectors (pb
, ddr
);
4006 /* Sets up the Omega dependence problem for the data dependence
4007 relation DDR. Returns false when the constraint system cannot be
4008 built, ie. when the test answers "don't know". Returns true
4009 otherwise, and when independence has been proved (using one of the
4010 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
4011 set MAYBE_DEPENDENT to true.
4013 Example: for setting up the dependence system corresponding to the
4014 conflicting accesses
4019 | ... A[2*j, 2*(i + j)]
4023 the following constraints come from the iteration domain:
4030 where di, dj are the distance variables. The constraints
4031 representing the conflicting elements are:
4034 i + 1 = 2 * (i + di + j + dj)
4036 For asking that the resulting distance vector (di, dj) be
4037 lexicographically positive, we insert the constraint "di >= 0". If
4038 "di = 0" in the solution, we fix that component to zero, and we
4039 look at the inner loops: we set a new problem where all the outer
4040 loop distances are zero, and fix this inner component to be
4041 positive. When one of the components is positive, we save that
4042 distance, and set a new problem where the distance on this loop is
4043 zero, searching for other distances in the inner loops. Here is
4044 the classic example that illustrates that we have to set for each
4045 inner loop a new problem:
4053 we have to save two distances (1, 0) and (0, 1).
4055 Given two array references, refA and refB, we have to set the
4056 dependence problem twice, refA vs. refB and refB vs. refA, and we
4057 cannot do a single test, as refB might occur before refA in the
4058 inner loops, and the contrary when considering outer loops: ex.
4063 | T[{1,+,1}_2][{1,+,1}_1] // refA
4064 | T[{2,+,1}_2][{0,+,1}_1] // refB
4069 refB touches the elements in T before refA, and thus for the same
4070 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4071 but for successive loop_0 iterations, we have (1, -1, 1)
4073 The Omega solver expects the distance variables ("di" in the
4074 previous example) to come first in the constraint system (as
4075 variables to be protected, or "safe" variables), the constraint
4076 system is built using the following layout:
4078 "cst | distance vars | index vars".
4082 init_omega_for_ddr (struct data_dependence_relation
*ddr
,
4083 bool *maybe_dependent
)
4088 *maybe_dependent
= true;
4090 if (same_access_functions (ddr
))
4093 lambda_vector dir_v
;
4095 /* Save the 0 vector. */
4096 save_dist_v (ddr
, lambda_vector_new (DDR_NB_LOOPS (ddr
)));
4097 dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
4098 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
4099 dir_v
[j
] = dir_equal
;
4100 save_dir_v (ddr
, dir_v
);
4102 /* Save the dependences carried by outer loops. */
4103 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4104 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4106 omega_free_problem (pb
);
4110 /* Omega expects the protected variables (those that have to be kept
4111 after elimination) to appear first in the constraint system.
4112 These variables are the distance variables. In the following
4113 initialization we declare NB_LOOPS safe variables, and the total
4114 number of variables for the constraint system is 2*NB_LOOPS. */
4115 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4116 res
= init_omega_for_ddr_1 (DDR_A (ddr
), DDR_B (ddr
), ddr
, pb
,
4118 omega_free_problem (pb
);
4120 /* Stop computation if not decidable, or no dependence. */
4121 if (res
== false || *maybe_dependent
== false)
4124 pb
= omega_alloc_problem (2 * DDR_NB_LOOPS (ddr
), DDR_NB_LOOPS (ddr
));
4125 res
= init_omega_for_ddr_1 (DDR_B (ddr
), DDR_A (ddr
), ddr
, pb
,
4127 omega_free_problem (pb
);
4132 /* Return true when DDR contains the same information as that stored
4133 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
4136 ddr_consistent_p (FILE *file
,
4137 struct data_dependence_relation
*ddr
,
4138 vec
<lambda_vector
> dist_vects
,
4139 vec
<lambda_vector
> dir_vects
)
4143 /* If dump_file is set, output there. */
4144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4147 if (dist_vects
.length () != DDR_NUM_DIST_VECTS (ddr
))
4149 lambda_vector b_dist_v
;
4150 fprintf (file
, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4151 dist_vects
.length (),
4152 DDR_NUM_DIST_VECTS (ddr
));
4154 fprintf (file
, "Banerjee dist vectors:\n");
4155 FOR_EACH_VEC_ELT (dist_vects
, i
, b_dist_v
)
4156 print_lambda_vector (file
, b_dist_v
, DDR_NB_LOOPS (ddr
));
4158 fprintf (file
, "Omega dist vectors:\n");
4159 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4160 print_lambda_vector (file
, DDR_DIST_VECT (ddr
, i
), DDR_NB_LOOPS (ddr
));
4162 fprintf (file
, "data dependence relation:\n");
4163 dump_data_dependence_relation (file
, ddr
);
4165 fprintf (file
, ")\n");
4169 if (dir_vects
.length () != DDR_NUM_DIR_VECTS (ddr
))
4171 fprintf (file
, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4172 dir_vects
.length (),
4173 DDR_NUM_DIR_VECTS (ddr
));
4177 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
4179 lambda_vector a_dist_v
;
4180 lambda_vector b_dist_v
= DDR_DIST_VECT (ddr
, i
);
4182 /* Distance vectors are not ordered in the same way in the DDR
4183 and in the DIST_VECTS: search for a matching vector. */
4184 FOR_EACH_VEC_ELT (dist_vects
, j
, a_dist_v
)
4185 if (lambda_vector_equal (a_dist_v
, b_dist_v
, DDR_NB_LOOPS (ddr
)))
4188 if (j
== dist_vects
.length ())
4190 fprintf (file
, "\n(Dist vectors from the first dependence analyzer:\n");
4191 print_dist_vectors (file
, dist_vects
, DDR_NB_LOOPS (ddr
));
4192 fprintf (file
, "not found in Omega dist vectors:\n");
4193 print_dist_vectors (file
, DDR_DIST_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4194 fprintf (file
, "data dependence relation:\n");
4195 dump_data_dependence_relation (file
, ddr
);
4196 fprintf (file
, ")\n");
4200 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
4202 lambda_vector a_dir_v
;
4203 lambda_vector b_dir_v
= DDR_DIR_VECT (ddr
, i
);
4205 /* Direction vectors are not ordered in the same way in the DDR
4206 and in the DIR_VECTS: search for a matching vector. */
4207 FOR_EACH_VEC_ELT (dir_vects
, j
, a_dir_v
)
4208 if (lambda_vector_equal (a_dir_v
, b_dir_v
, DDR_NB_LOOPS (ddr
)))
4211 if (j
== dist_vects
.length ())
4213 fprintf (file
, "\n(Dir vectors from the first dependence analyzer:\n");
4214 print_dir_vectors (file
, dir_vects
, DDR_NB_LOOPS (ddr
));
4215 fprintf (file
, "not found in Omega dir vectors:\n");
4216 print_dir_vectors (file
, DDR_DIR_VECTS (ddr
), DDR_NB_LOOPS (ddr
));
4217 fprintf (file
, "data dependence relation:\n");
4218 dump_data_dependence_relation (file
, ddr
);
4219 fprintf (file
, ")\n");
4226 /* This computes the affine dependence relation between A and B with
4227 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4228 independence between two accesses, while CHREC_DONT_KNOW is used
4229 for representing the unknown relation.
4231 Note that it is possible to stop the computation of the dependence
4232 relation the first time we detect a CHREC_KNOWN element for a given
4236 compute_affine_dependence (struct data_dependence_relation
*ddr
,
4237 struct loop
*loop_nest
)
4239 struct data_reference
*dra
= DDR_A (ddr
);
4240 struct data_reference
*drb
= DDR_B (ddr
);
4242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4244 fprintf (dump_file
, "(compute_affine_dependence\n");
4245 fprintf (dump_file
, " stmt_a: ");
4246 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
4247 fprintf (dump_file
, " stmt_b: ");
4248 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
4251 /* Analyze only when the dependence relation is not yet known. */
4252 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4254 dependence_stats
.num_dependence_tests
++;
4256 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
4257 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
4259 subscript_dependence_tester (ddr
, loop_nest
);
4261 if (flag_check_data_deps
)
4263 /* Dump the dependences from the first algorithm. */
4264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4266 fprintf (dump_file
, "\n\nBanerjee Analyzer\n");
4267 dump_data_dependence_relation (dump_file
, ddr
);
4270 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
4272 bool maybe_dependent
;
4273 vec
<lambda_vector
> dir_vects
, dist_vects
;
4275 /* Save the result of the first DD analyzer. */
4276 dist_vects
= DDR_DIST_VECTS (ddr
);
4277 dir_vects
= DDR_DIR_VECTS (ddr
);
4279 /* Reset the information. */
4280 DDR_DIST_VECTS (ddr
).create (0);
4281 DDR_DIR_VECTS (ddr
).create (0);
4283 /* Compute the same information using Omega. */
4284 if (!init_omega_for_ddr (ddr
, &maybe_dependent
))
4285 goto csys_dont_know
;
4287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4289 fprintf (dump_file
, "Omega Analyzer\n");
4290 dump_data_dependence_relation (dump_file
, ddr
);
4293 /* Check that we get the same information. */
4294 if (maybe_dependent
)
4295 gcc_assert (ddr_consistent_p (stderr
, ddr
, dist_vects
,
4301 /* As a last case, if the dependence cannot be determined, or if
4302 the dependence is considered too difficult to determine, answer
4307 dependence_stats
.num_dependence_undetermined
++;
4309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4311 fprintf (dump_file
, "Data ref a:\n");
4312 dump_data_reference (dump_file
, dra
);
4313 fprintf (dump_file
, "Data ref b:\n");
4314 dump_data_reference (dump_file
, drb
);
4315 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
4317 finalize_ddr_dependent (ddr
, chrec_dont_know
);
4321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4323 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4324 fprintf (dump_file
, ") -> no dependence\n");
4325 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
4326 fprintf (dump_file
, ") -> dependence analysis failed\n");
4328 fprintf (dump_file
, ")\n");
4332 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4333 the data references in DATAREFS, in the LOOP_NEST. When
4334 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4335 relations. Return true when successful, i.e. data references number
4336 is small enough to be handled. */
4339 compute_all_dependences (vec
<data_reference_p
> datarefs
,
4340 vec
<ddr_p
> *dependence_relations
,
4341 vec
<loop_p
> loop_nest
,
4342 bool compute_self_and_rr
)
4344 struct data_dependence_relation
*ddr
;
4345 struct data_reference
*a
, *b
;
4348 if ((int) datarefs
.length ()
4349 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS
))
4351 struct data_dependence_relation
*ddr
;
4353 /* Insert a single relation into dependence_relations:
4355 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
4356 dependence_relations
->safe_push (ddr
);
4360 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4361 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
4362 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
4364 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
4365 dependence_relations
->safe_push (ddr
);
4366 if (loop_nest
.exists ())
4367 compute_affine_dependence (ddr
, loop_nest
[0]);
4370 if (compute_self_and_rr
)
4371 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
4373 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
4374 dependence_relations
->safe_push (ddr
);
4375 if (loop_nest
.exists ())
4376 compute_affine_dependence (ddr
, loop_nest
[0]);
4382 /* Describes a location of a memory reference. */
4384 typedef struct data_ref_loc_d
4386 /* The memory reference. */
4389 /* True if the memory reference is read. */
4394 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4395 true if STMT clobbers memory, false otherwise. */
4398 get_references_in_stmt (gimple stmt
, vec
<data_ref_loc
, va_heap
> *references
)
4400 bool clobbers_memory
= false;
4403 enum gimple_code stmt_code
= gimple_code (stmt
);
4405 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4406 As we cannot model data-references to not spelled out
4407 accesses give up if they may occur. */
4408 if (stmt_code
== GIMPLE_CALL
4409 && !(gimple_call_flags (stmt
) & ECF_CONST
))
4411 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
4412 if (gimple_call_internal_p (stmt
))
4413 switch (gimple_call_internal_fn (stmt
))
4415 case IFN_GOMP_SIMD_LANE
:
4417 struct loop
*loop
= gimple_bb (stmt
)->loop_father
;
4418 tree uid
= gimple_call_arg (stmt
, 0);
4419 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
4421 || loop
->simduid
!= SSA_NAME_VAR (uid
))
4422 clobbers_memory
= true;
4426 case IFN_MASK_STORE
:
4429 clobbers_memory
= true;
4433 clobbers_memory
= true;
4435 else if (stmt_code
== GIMPLE_ASM
4436 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
4437 || gimple_vuse (stmt
)))
4438 clobbers_memory
= true;
4440 if (!gimple_vuse (stmt
))
4441 return clobbers_memory
;
4443 if (stmt_code
== GIMPLE_ASSIGN
)
4446 op0
= gimple_assign_lhs (stmt
);
4447 op1
= gimple_assign_rhs1 (stmt
);
4450 || (REFERENCE_CLASS_P (op1
)
4451 && (base
= get_base_address (op1
))
4452 && TREE_CODE (base
) != SSA_NAME
))
4456 references
->safe_push (ref
);
4459 else if (stmt_code
== GIMPLE_CALL
)
4463 ref
.is_read
= false;
4464 if (gimple_call_internal_p (stmt
))
4465 switch (gimple_call_internal_fn (stmt
))
4468 if (gimple_call_lhs (stmt
) == NULL_TREE
)
4471 case IFN_MASK_STORE
:
4472 ref
.ref
= fold_build2 (MEM_REF
,
4474 ? TREE_TYPE (gimple_call_lhs (stmt
))
4475 : TREE_TYPE (gimple_call_arg (stmt
, 3)),
4476 gimple_call_arg (stmt
, 0),
4477 gimple_call_arg (stmt
, 1));
4478 references
->safe_push (ref
);
4484 op0
= gimple_call_lhs (stmt
);
4485 n
= gimple_call_num_args (stmt
);
4486 for (i
= 0; i
< n
; i
++)
4488 op1
= gimple_call_arg (stmt
, i
);
4491 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
4495 references
->safe_push (ref
);
4500 return clobbers_memory
;
4504 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
4507 ref
.is_read
= false;
4508 references
->safe_push (ref
);
4510 return clobbers_memory
;
4513 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4514 reference, returns false, otherwise returns true. NEST is the outermost
4515 loop of the loop nest in which the references should be analyzed. */
4518 find_data_references_in_stmt (struct loop
*nest
, gimple stmt
,
4519 vec
<data_reference_p
> *datarefs
)
4522 auto_vec
<data_ref_loc
, 2> references
;
4525 data_reference_p dr
;
4527 if (get_references_in_stmt (stmt
, &references
))
4530 FOR_EACH_VEC_ELT (references
, i
, ref
)
4532 dr
= create_data_ref (nest
, loop_containing_stmt (stmt
),
4533 ref
->ref
, stmt
, ref
->is_read
);
4534 gcc_assert (dr
!= NULL
);
4535 datarefs
->safe_push (dr
);
4537 references
.release ();
4541 /* Stores the data references in STMT to DATAREFS. If there is an
4542 unanalyzable reference, returns false, otherwise returns true.
4543 NEST is the outermost loop of the loop nest in which the references
4544 should be instantiated, LOOP is the loop in which the references
4545 should be analyzed. */
4548 graphite_find_data_references_in_stmt (loop_p nest
, loop_p loop
, gimple stmt
,
4549 vec
<data_reference_p
> *datarefs
)
4552 auto_vec
<data_ref_loc
, 2> references
;
4555 data_reference_p dr
;
4557 if (get_references_in_stmt (stmt
, &references
))
4560 FOR_EACH_VEC_ELT (references
, i
, ref
)
4562 dr
= create_data_ref (nest
, loop
, ref
->ref
, stmt
, ref
->is_read
);
4563 gcc_assert (dr
!= NULL
);
4564 datarefs
->safe_push (dr
);
4567 references
.release ();
4571 /* Search the data references in LOOP, and record the information into
4572 DATAREFS. Returns chrec_dont_know when failing to analyze a
4573 difficult case, returns NULL_TREE otherwise. */
4576 find_data_references_in_bb (struct loop
*loop
, basic_block bb
,
4577 vec
<data_reference_p
> *datarefs
)
4579 gimple_stmt_iterator bsi
;
4581 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
4583 gimple stmt
= gsi_stmt (bsi
);
4585 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
4587 struct data_reference
*res
;
4588 res
= XCNEW (struct data_reference
);
4589 datarefs
->safe_push (res
);
4591 return chrec_dont_know
;
4598 /* Search the data references in LOOP, and record the information into
4599 DATAREFS. Returns chrec_dont_know when failing to analyze a
4600 difficult case, returns NULL_TREE otherwise.
4602 TODO: This function should be made smarter so that it can handle address
4603 arithmetic as if they were array accesses, etc. */
4606 find_data_references_in_loop (struct loop
*loop
,
4607 vec
<data_reference_p
> *datarefs
)
4609 basic_block bb
, *bbs
;
4612 bbs
= get_loop_body_in_dom_order (loop
);
4614 for (i
= 0; i
< loop
->num_nodes
; i
++)
4618 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
4621 return chrec_dont_know
;
4629 /* Recursive helper function. */
4632 find_loop_nest_1 (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4634 /* Inner loops of the nest should not contain siblings. Example:
4635 when there are two consecutive loops,
4646 the dependence relation cannot be captured by the distance
4651 loop_nest
->safe_push (loop
);
4653 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4657 /* Return false when the LOOP is not well nested. Otherwise return
4658 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4659 contain the loops from the outermost to the innermost, as they will
4660 appear in the classic distance vector. */
4663 find_loop_nest (struct loop
*loop
, vec
<loop_p
> *loop_nest
)
4665 loop_nest
->safe_push (loop
);
4667 return find_loop_nest_1 (loop
->inner
, loop_nest
);
4671 /* Returns true when the data dependences have been computed, false otherwise.
4672 Given a loop nest LOOP, the following vectors are returned:
4673 DATAREFS is initialized to all the array elements contained in this loop,
4674 DEPENDENCE_RELATIONS contains the relations between the data references.
4675 Compute read-read and self relations if
4676 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4679 compute_data_dependences_for_loop (struct loop
*loop
,
4680 bool compute_self_and_read_read_dependences
,
4681 vec
<loop_p
> *loop_nest
,
4682 vec
<data_reference_p
> *datarefs
,
4683 vec
<ddr_p
> *dependence_relations
)
4687 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
4689 /* If the loop nest is not well formed, or one of the data references
4690 is not computable, give up without spending time to compute other
4693 || !find_loop_nest (loop
, loop_nest
)
4694 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
4695 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
4696 compute_self_and_read_read_dependences
))
4699 if (dump_file
&& (dump_flags
& TDF_STATS
))
4701 fprintf (dump_file
, "Dependence tester statistics:\n");
4703 fprintf (dump_file
, "Number of dependence tests: %d\n",
4704 dependence_stats
.num_dependence_tests
);
4705 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
4706 dependence_stats
.num_dependence_dependent
);
4707 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
4708 dependence_stats
.num_dependence_independent
);
4709 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
4710 dependence_stats
.num_dependence_undetermined
);
4712 fprintf (dump_file
, "Number of subscript tests: %d\n",
4713 dependence_stats
.num_subscript_tests
);
4714 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
4715 dependence_stats
.num_subscript_undetermined
);
4716 fprintf (dump_file
, "Number of same subscript function: %d\n",
4717 dependence_stats
.num_same_subscript_function
);
4719 fprintf (dump_file
, "Number of ziv tests: %d\n",
4720 dependence_stats
.num_ziv
);
4721 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
4722 dependence_stats
.num_ziv_dependent
);
4723 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
4724 dependence_stats
.num_ziv_independent
);
4725 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
4726 dependence_stats
.num_ziv_unimplemented
);
4728 fprintf (dump_file
, "Number of siv tests: %d\n",
4729 dependence_stats
.num_siv
);
4730 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
4731 dependence_stats
.num_siv_dependent
);
4732 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
4733 dependence_stats
.num_siv_independent
);
4734 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
4735 dependence_stats
.num_siv_unimplemented
);
4737 fprintf (dump_file
, "Number of miv tests: %d\n",
4738 dependence_stats
.num_miv
);
4739 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
4740 dependence_stats
.num_miv_dependent
);
4741 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
4742 dependence_stats
.num_miv_independent
);
4743 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
4744 dependence_stats
.num_miv_unimplemented
);
4750 /* Returns true when the data dependences for the basic block BB have been
4751 computed, false otherwise.
4752 DATAREFS is initialized to all the array elements contained in this basic
4753 block, DEPENDENCE_RELATIONS contains the relations between the data
4754 references. Compute read-read and self relations if
4755 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4757 compute_data_dependences_for_bb (basic_block bb
,
4758 bool compute_self_and_read_read_dependences
,
4759 vec
<data_reference_p
> *datarefs
,
4760 vec
<ddr_p
> *dependence_relations
)
4762 if (find_data_references_in_bb (NULL
, bb
, datarefs
) == chrec_dont_know
)
4765 return compute_all_dependences (*datarefs
, dependence_relations
, vNULL
,
4766 compute_self_and_read_read_dependences
);
4769 /* Entry point (for testing only). Analyze all the data references
4770 and the dependence relations in LOOP.
4772 The data references are computed first.
4774 A relation on these nodes is represented by a complete graph. Some
4775 of the relations could be of no interest, thus the relations can be
4778 In the following function we compute all the relations. This is
4779 just a first implementation that is here for:
4780 - for showing how to ask for the dependence relations,
4781 - for the debugging the whole dependence graph,
4782 - for the dejagnu testcases and maintenance.
4784 It is possible to ask only for a part of the graph, avoiding to
4785 compute the whole dependence graph. The computed dependences are
4786 stored in a knowledge base (KB) such that later queries don't
4787 recompute the same information. The implementation of this KB is
4788 transparent to the optimizer, and thus the KB can be changed with a
4789 more efficient implementation, or the KB could be disabled. */
4791 analyze_all_data_dependences (struct loop
*loop
)
4794 int nb_data_refs
= 10;
4795 vec
<data_reference_p
> datarefs
;
4796 datarefs
.create (nb_data_refs
);
4797 vec
<ddr_p
> dependence_relations
;
4798 dependence_relations
.create (nb_data_refs
* nb_data_refs
);
4799 vec
<loop_p
> loop_nest
;
4800 loop_nest
.create (3);
4802 /* Compute DDs on the whole function. */
4803 compute_data_dependences_for_loop (loop
, false, &loop_nest
, &datarefs
,
4804 &dependence_relations
);
4808 dump_data_dependence_relations (dump_file
, dependence_relations
);
4809 fprintf (dump_file
, "\n\n");
4811 if (dump_flags
& TDF_DETAILS
)
4812 dump_dist_dir_vectors (dump_file
, dependence_relations
);
4814 if (dump_flags
& TDF_STATS
)
4816 unsigned nb_top_relations
= 0;
4817 unsigned nb_bot_relations
= 0;
4818 unsigned nb_chrec_relations
= 0;
4819 struct data_dependence_relation
*ddr
;
4821 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4823 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr
)))
4826 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
4830 nb_chrec_relations
++;
4833 gather_stats_on_scev_database ();
4837 loop_nest
.release ();
4838 free_dependence_relations (dependence_relations
);
4839 free_data_refs (datarefs
);
4842 /* Computes all the data dependences and check that the results of
4843 several analyzers are the same. */
4846 tree_check_data_deps (void)
4848 struct loop
*loop_nest
;
4850 FOR_EACH_LOOP (loop_nest
, 0)
4851 analyze_all_data_dependences (loop_nest
);
4854 /* Free the memory used by a data dependence relation DDR. */
4857 free_dependence_relation (struct data_dependence_relation
*ddr
)
4862 if (DDR_SUBSCRIPTS (ddr
).exists ())
4863 free_subscripts (DDR_SUBSCRIPTS (ddr
));
4864 DDR_DIST_VECTS (ddr
).release ();
4865 DDR_DIR_VECTS (ddr
).release ();
4870 /* Free the memory used by the data dependence relations from
4871 DEPENDENCE_RELATIONS. */
4874 free_dependence_relations (vec
<ddr_p
> dependence_relations
)
4877 struct data_dependence_relation
*ddr
;
4879 FOR_EACH_VEC_ELT (dependence_relations
, i
, ddr
)
4881 free_dependence_relation (ddr
);
4883 dependence_relations
.release ();
4886 /* Free the memory used by the data references from DATAREFS. */
4889 free_data_refs (vec
<data_reference_p
> datarefs
)
4892 struct data_reference
*dr
;
4894 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
4896 datarefs
.release ();